diff --git a/.github/workflows/build_packages.yml b/.github/workflows/build_packages.yml
index 3a660dec9..51feda808 100644
--- a/.github/workflows/build_packages.yml
+++ b/.github/workflows/build_packages.yml
@@ -33,7 +33,6 @@ jobs:
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
           python-version: 3.12
-          cache: "pip"
       - name: Install Python packages
         run: |
           pip install packaging
@@ -134,6 +133,7 @@ jobs:
       env:
         OUTPUT_DIR: "${{ github.workspace }}/bindist"
         OVERRIDE_PYTHON_VERSIONS: "${{ matrix.python-version }}"
+        SHORTFIN_ENABLE_TRACING: "ON"
       run: |
         [ -e ./bindist/* ] && rm ./bindist/*
         ./c/shortfin/build_tools/build_linux_package.sh
diff --git a/.github/workflows/ci-shark-ai.yml b/.github/workflows/ci-shark-ai.yml
deleted file mode 100644
index fc85a76a7..000000000
--- a/.github/workflows/ci-shark-ai.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright 2024 Advanced Micro Devices, Inc.
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-name: CI - shark-ai
-
-on:
-  workflow_dispatch:
-  pull_request:
-  push:
-    branches:
-      - main
-
-concurrency:
-  # A PR number if a pull request and otherwise the commit hash. This cancels
-  # queued and in-progress runs for the same PR (presubmit) or commit
-  # (postsubmit). The workflow name is prepended to avoid conflicts between
-  # different workflows.
-  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
-  cancel-in-progress: true
-
-jobs:
-  test_shortfin_llm_server:
-    name: "Integration Tests - Shortfin LLM Server"
-    strategy:
-      matrix:
-        version: [3.11]
-      fail-fast: false
-    runs-on: nodai-amdgpu-mi250-x86-64
-    defaults:
-      run:
-        shell: bash
-    env:
-      PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
-    steps:
-      - name: "Setting up Python"
-        id: setup_python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-        with:
-          python-version: ${{matrix.version}}
-
-      - name: "Checkout Code"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Cache Pip Packages
-        uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
-        id: cache-pip
-        with:
-          path: ${{ env.PIP_CACHE_DIR }}
-          key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','shortfin/requirements*.txt','sharktank/requirements*.txt') }}
-
-      - name: Install pip deps
-        run: |
-          python -m pip install --no-compile --upgrade pip
-          # Note: We install in three steps in order to satisfy requirements
-          # from non default locations first. Installing the PyTorch CPU
-          # wheels saves multiple minutes and a lot of bandwidth on runner setup.
-          pip install --no-compile -r pytorch-cpu-requirements.txt
-          pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
-
-          # Install latest iree-tubrine.
-          pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-            -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
-
-          # Try with the latest IREE nightly releases, not what iree-turbine pins.
-          # We could also pin to a known working or stable version.
-          # This should eventually stabilize. Do the best we can for now.
-          pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
-            iree-base-compiler \
-            iree-base-runtime
-
-      - name: Run LLM Integration Tests
-        run: pytest -v app_tests/integration_tests/llm/shortfin --log-cli-level=INFO
diff --git a/.github/workflows/pkgci.yml b/.github/workflows/pkgci.yml
new file mode 100644
index 000000000..765d8c0b7
--- /dev/null
+++ b/.github/workflows/pkgci.yml
@@ -0,0 +1,35 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: PkgCI
+
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+
+concurrency:
+  # A PR number if a pull request and otherwise the commit hash. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit). The workflow name is prepended to avoid conflicts between
+  # different workflows.
+  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  build_packages:
+    name: Build Packages
+    uses: ./.github/workflows/pkgci_build_packages.yml
+
+  test_shark_ai:
+    name: Test shark-ai
+    needs: [build_packages]
+    uses: ./.github/workflows/pkgci_shark_ai.yml
diff --git a/.github/workflows/pkgci_build_packages.yml b/.github/workflows/pkgci_build_packages.yml
new file mode 100644
index 000000000..6a7884273
--- /dev/null
+++ b/.github/workflows/pkgci_build_packages.yml
@@ -0,0 +1,71 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: PkgCI Build Packages
+
+on:
+  workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  build_packages:
+    runs-on: ubuntu-24.04
+    env:
+      CACHE_DIR: ${{ github.workspace }}/.shark-ai-cache
+      OUTPUT_DIR: "${{ github.workspace }}/bindist"
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+    - name: Setup Python
+      uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+      with:
+        python-version: 3.12
+
+    # Setup metadata.
+    - name: Install Python packages
+      run: pip install packaging
+    - name: Generate dev package versions
+      id: version_rc
+      run: |
+        version_suffix=".dev0+${{ github.sha }}"
+        echo "version_suffix=${version_suffix}" >> $GITHUB_ENV
+        sharktank_package_version=$(python3 build_tools/python_deploy/compute_local_version.py --version-suffix=${version_suffix} sharktank)
+        shortfin_package_version=$(python3 build_tools/python_deploy/compute_local_version.py --version-suffix=${version_suffix} shortfin)
+        sharkai_package_version=$(python3 build_tools/python_deploy/compute_common_version.py -rc --version-suffix=${version_suffix} --write-json)
+
+    - name: Enable cache
+      uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
+      with:
+        path: ${{ env.CACHE_DIR }}
+        key: shark-ai-pkgci-linux-packages-x86_64-v1-${{ github.sha }}
+        restore-keys: |
+          shark-ai-pkgci-linux-packages-x86_64-v1-
+
+    # Build dev packages.
+    - name: Build sharktank
+      run: |
+        ./sharktank/build_tools/build_linux_package.sh
+    - name: Build shortfin
+      env:
+        OVERRIDE_PYTHON_VERSIONS: cp311-cp311
+        SHORTFIN_ENABLE_TRACING: "OFF"
+      run: |
+        ./shortfin/build_tools/build_linux_package.sh
+    - name: Build shark-ai
+      run: |
+        ./build_tools/python_deploy/write_requirements.py --version-suffix=${version_suffix}
+        ./shark-ai/build_tools/build_linux_package.sh
+
+    # Upload.
+    - name: Upload python wheels
+      uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+      with:
+        if-no-files-found: error
+        name: linux_x86_64_dev_packages
+        path: bindist
diff --git a/.github/workflows/pkgci_shark_ai.yml b/.github/workflows/pkgci_shark_ai.yml
new file mode 100644
index 000000000..7a01c941f
--- /dev/null
+++ b/.github/workflows/pkgci_shark_ai.yml
@@ -0,0 +1,74 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: PkgCI - shark-ai
+
+on:
+  workflow_call:
+    inputs:
+      artifact_run_id:
+        type: string
+        default: ""
+  workflow_dispatch:
+    inputs:
+      artifact_run_id:
+        type: string
+        description: "Id for a workflow run that produced dev packages"
+        default: ""
+
+concurrency:
+  # A PR number if a pull request and otherwise the commit hash. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit). The workflow name is prepended to avoid conflicts between
+  # different workflows.
+  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  test_shortfin_llm_server:
+    name: "Integration Tests - Shortfin LLM Server"
+    strategy:
+      matrix:
+        version: [3.11]
+      fail-fast: false
+    runs-on: nodai-amdgpu-mi250-x86-64
+    defaults:
+      run:
+        shell: bash
+    env:
+      PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
+      VENV_DIR: ${{ github.workspace }}/.venv
+    steps:
+      - name: "Checkout Code"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: "Setting up Python"
+        id: setup_python
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{matrix.version}}
+
+      - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: linux_x86_64_dev_packages
+          path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
+      - name: Setup venv
+        run: |
+          ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
+            --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
+            --fetch-gh-workflow=${{ inputs.artifact_run_id }}
+
+      - name: Install nightly IREE packages
+        run: |
+          source ${VENV_DIR}/bin/activate
+          uv pip install "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
+          uv pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
+            iree-base-compiler \
+            iree-base-runtime
+
+      - name: Run LLM Integration Tests
+        run: |
+          source ${VENV_DIR}/bin/activate
+          pytest -v app_tests/integration_tests/llm/shortfin --log-cli-level=INFO
diff --git a/build_tools/pkgci/setup_venv.py b/build_tools/pkgci/setup_venv.py
new file mode 100755
index 000000000..9c4caa041
--- /dev/null
+++ b/build_tools/pkgci/setup_venv.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# Copyright 2024 Advanced Micro Devices, Inc.
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Sets up a Python venv with shark-ai packages from a workflow run.
+
+There are several modes in which to use this script:
+
+* Within a workflow triggered by `workflow_call`, an artifact action will
+  typically be used to fetch relevant package artifacts. Specify the fetched
+  location with `--artifact-path=`:
+
+  ```yml
+  - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+    with:
+      name: linux_x86_64_release_packages
+      path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
+  - name: Setup venv
+    run: |
+      ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
+      --artifact-path=${PACKAGE_DOWNLOAD_DIR}
+  ```
+
+* Within a workflow triggered by `workflow_dispatch`, pass `artifact_run_id` as
+  an input that developers must specify when running the workflow:
+
+  ```yml
+  on:
+    workflow_dispatch:
+      inputs:
+      artifact_run_id:
+        type: string
+        default: ""
+
+  ...
+    steps:
+    - name: Setup venv
+      run: |
+        ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
+        --fetch-gh-workflow=${{ inputs.artifact_run_id }}
+  ```
+
+  (Note that these two modes are often combined to allow for workflow testing)
+
+* Locally, the `--fetch-gh-workflow=WORKFLOW_ID` can be used to download and
+  setup the venv from a specific workflow run in one step:
+
+
+  ```bash
+  python3.11 ./build_tools/pkgci/setup_venv.py /tmp/.venv --fetch-gh-workflow=12056182052
+  ```
+
+* Locally, the `--fetch-git-ref=GIT_REF` can be used to download and setup the
+  venv from the latest workflow run for a given ref (commit) in one step:
+
+  ```bash
+  python3.11 ./build_tools/pkgci/setup_venv.py /tmp/.venv --fetch-git-ref=main
+  ```
+
+You must have the `gh` command line tool installed and authenticated if you
+will be fetching artifacts.
+"""
+
+from glob import glob
+from pathlib import Path
+from typing import Optional, Dict, Tuple
+
+import argparse
+import functools
+import json
+import os
+import platform
+import subprocess
+import sys
+import tempfile
+import zipfile
+
+THIS_DIR = Path(__file__).parent.resolve()
+REPO_ROOT = THIS_DIR.parent.parent
+
+
+def parse_arguments(argv=None):
+    parser = argparse.ArgumentParser(description="Setup venv")
+    parser.add_argument(
+        "venv_dir", type=Path, help="Directory in which to create the venv"
+    )
+    parser.add_argument("--artifact-path", help="Path in which to find/fetch artifacts")
+    parser.add_argument(
+        "--packages",
+        help="Comma-delimited list of packages to install, in order",
+        default="shark-ai,shortfin,sharktank",
+    )
+    parser.add_argument(
+        "--install-using-index",
+        help="The default mode installs with `--no-index` to be sure that only "
+        "our packages are installed. Setting this flag removes that option, "
+        "more closely matching the behavior that users will see when they "
+        "install published packages.",
+        action="store_true",
+    )
+
+    fetch_group = parser.add_mutually_exclusive_group()
+    fetch_group.add_argument(
+        "--fetch-gh-workflow", help="Fetch artifacts from a GitHub workflow"
+    )
+    fetch_group.add_argument("--fetch-git-ref", help="Fetch artifacts for a git ref")
+
+    args = parser.parse_args(argv)
+    return args
+
+
+def get_latest_workflow_run_id_for_ref(ref: str) -> int:
+    print(f"Normalizing ref: {ref}")
+    normalized_ref = (
+        subprocess.check_output(["git", "rev-parse", ref], cwd=REPO_ROOT)
+        .decode()
+        .strip()
+    )
+
+    print(f"Fetching artifacts for normalized ref: {normalized_ref}")
+    base_path = f"/repos/nod-ai/shark-ai"
+    workflow_run_args = [
+        "gh",
+        "api",
+        "-H",
+        "Accept: application/vnd.github+json",
+        "-H",
+        "X-GitHub-Api-Version: 2022-11-28",
+        f"{base_path}/actions/workflows/pkgci.yml/runs?head_sha={normalized_ref}",
+    ]
+    print(f"Running command to list workflow runs:\n  {' '.join(workflow_run_args)}")
+    workflow_run_output = subprocess.check_output(workflow_run_args)
+    workflow_run_json_output = json.loads(workflow_run_output)
+    if workflow_run_json_output["total_count"] == 0:
+        raise RuntimeError("Workflow did not run at this commit")
+
+    latest_run = workflow_run_json_output["workflow_runs"][-1]
+    print(f"Found workflow run: {latest_run['html_url']}")
+    return latest_run["id"]
+
+
+@functools.lru_cache
+def list_gh_artifacts(run_id: str) -> Dict[str, str]:
+    print(f"Fetching artifacts for workflow run {run_id}")
+    base_path = f"/repos/nod-ai/shark-ai"
+    output = subprocess.check_output(
+        [
+            "gh",
+            "api",
+            "-H",
+            "Accept: application/vnd.github+json",
+            "-H",
+            "X-GitHub-Api-Version: 2022-11-28",
+            f"{base_path}/actions/runs/{run_id}/artifacts",
+        ]
+    )
+    data = json.loads(output)
+    # Uncomment to debug:
+    # print(json.dumps(data, indent=2))
+    artifacts = {
+        rec["name"]: f"{base_path}/actions/artifacts/{rec['id']}/zip"
+        for rec in data["artifacts"]
+    }
+    print("Found artifacts:")
+    for k, v in artifacts.items():
+        print(f"  {k}: {v}")
+    return artifacts
+
+
+def fetch_gh_artifact(api_path: str, file: Path):
+    print(f"Downloading artifact {api_path}")
+    contents = subprocess.check_output(
+        [
+            "gh",
+            "api",
+            "-H",
+            "Accept: application/vnd.github+json",
+            "-H",
+            "X-GitHub-Api-Version: 2022-11-28",
+            api_path,
+        ]
+    )
+    file.write_bytes(contents)
+
+
+def find_venv_python(venv_path: Path) -> Optional[Path]:
+    paths = [venv_path / "bin" / "python", venv_path / "Scripts" / "python.exe"]
+    for p in paths:
+        if p.exists():
+            return p
+    return None
+
+
+def install_with_index(python_exe, wheels):
+    # Install each of the built wheels, allowing dependencies and an index.
+    # Note that --pre pulls in prerelease versions of dependencies too, like
+    # numpy. We could try a solution like https://stackoverflow.com/a/76124424.
+    for artifact_path, package_name in wheels:
+        cmd = [
+            "uv",
+            "pip",
+            "install",
+            "--pre",
+            "-f",
+            str(artifact_path),
+            package_name,
+            "--python",
+            str(python_exe),
+        ]
+        print(f"\nRunning command: {' '.join([str(c) for c in cmd])}")
+        subprocess.check_call(cmd)
+
+
+def install_without_index(python_exe, packages, wheels):
+    # Install each of the built wheels without deps or consulting an index.
+    # This is because we absolutely don't want this falling back to anything
+    # but what we said.
+    for artifact_path, package_name in wheels:
+        cmd = [
+            "uv",
+            "pip",
+            "install",
+            "--no-deps",
+            "--no-index",
+            "-f",
+            str(artifact_path),
+            "--force-reinstall",
+            package_name,
+            "--python",
+            str(python_exe),
+        ]
+        print(f"\nRunning command: {' '.join([str(c) for c in cmd])}")
+        subprocess.check_call(cmd)
+
+    # Install requirements for the requested packages.
+    # Note that not all of these are included in the package dependencies, but
+    # developers usually want the test requirements too.
+    requirements_files = []
+    if "sharktank" in packages:
+        requirements_files.append("sharktank/requirements.txt")
+        requirements_files.append("sharktank/requirements-tests.txt")
+    if "shortfin" in packages:
+        requirements_files.append("shortfin/requirements-tests.txt")
+
+    for requirements_file in requirements_files:
+        cmd = [
+            "uv",
+            "pip",
+            "install",
+            "-r",
+            str(REPO_ROOT / requirements_file),
+            "--python",
+            str(python_exe),
+        ]
+        print(f"\nRunning command: {' '.join([str(c) for c in cmd])}")
+        subprocess.check_call(cmd)
+
+
+def find_wheel(args, artifact_prefix: str, package_name: str) -> Tuple[Path, str]:
+    artifact_path = Path(args.artifact_path)
+
+    def has_package():
+        norm_package_name = package_name.replace("-", "_")
+        pattern = str(artifact_path / f"{norm_package_name}-*.whl")
+        files = glob(pattern)
+        return bool(files)
+
+    if has_package():
+        return (artifact_path, package_name)
+
+    if not args.fetch_gh_workflow:
+        raise RuntimeError(
+            f"Could not find package {package_name} to install from {artifact_path}"
+        )
+
+    # Fetch.
+    artifact_path.mkdir(parents=True, exist_ok=True)
+    artifact_name = f"{artifact_prefix}_dev_packages"
+    artifact_file = artifact_path / f"{artifact_name}.zip"
+    if not artifact_file.exists():
+        print(f"Package {package_name} not found. Fetching from {artifact_name}...")
+        artifacts = list_gh_artifacts(args.fetch_gh_workflow)
+        if artifact_name not in artifacts:
+            raise RuntimeError(
+                f"Could not find required artifact {artifact_name} in run {args.fetch_gh_workflow}"
+            )
+        fetch_gh_artifact(artifacts[artifact_name], artifact_file)
+    print(f"Extracting {artifact_file}")
+    with zipfile.ZipFile(artifact_file) as zip_ref:
+        zip_ref.extractall(artifact_path)
+
+    # Try again.
+    if not has_package():
+        raise RuntimeError(f"Could not find {package_name} in {artifact_path}")
+    return (artifact_path, package_name)
+
+
+def main(args):
+    # Look up the workflow run for a ref.
+    if args.fetch_git_ref:
+        latest_gh_workflow = get_latest_workflow_run_id_for_ref(args.fetch_git_ref)
+        args.fetch_git_ref = ""
+        args.fetch_gh_workflow = str(latest_gh_workflow)
+        return main(args)
+
+    # Make sure we have an artifact path if fetching.
+    if not args.artifact_path and args.fetch_gh_workflow:
+        with tempfile.TemporaryDirectory() as td:
+            args.artifact_path = td
+            return main(args)
+
+    # Parse command-delimited list of packages from args.
+    packages = args.packages.split(",")
+    print("Installing packages:", packages)
+
+    artifact_prefix = f"{platform.system().lower()}_{platform.machine()}"
+    wheels = []
+    for package_name in packages:
+        wheels.append(find_wheel(args, artifact_prefix, package_name))
+    print("Installing wheels:", wheels)
+
+    # Set up venv using 'uv' (https://docs.astral.sh/uv/).
+    # We could use 'pip', but 'uv' is much faster at installing packages.
+    venv_path = args.venv_dir
+    python_exe = find_venv_python(venv_path)
+
+    if not python_exe:
+        print(f"Creating venv at {str(venv_path)}")
+
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "uv"])
+        subprocess.check_call(["uv", "venv", str(venv_path), "--python", "3.11"])
+        python_exe = find_venv_python(venv_path)
+        if not python_exe:
+            raise RuntimeError("Error creating venv")
+
+    # Install the PyTorch CPU wheels first to save multiple minutes and a lot of bandwidth.
+    cmd = [
+        "uv",
+        "pip",
+        "install",
+        "-r",
+        str(REPO_ROOT / "pytorch-cpu-requirements.txt"),
+        "--python",
+        str(python_exe),
+    ]
+    print(f"\nRunning command: {' '.join([str(c) for c in cmd])}")
+    subprocess.check_call(cmd)
+
+    if args.install_using_index:
+        install_with_index(python_exe, wheels)
+    else:
+        install_without_index(python_exe, packages, wheels)
+
+    # Log which packages are installed.
+    print("")
+    print(f"Checking packages with 'uv pip freeze':")
+    subprocess.check_call(
+        [
+            "uv",
+            "pip",
+            "freeze",
+            "--python",
+            str(python_exe),
+        ]
+    )
+
+    print("")
+    print(f"venv setup using uv, activate with:\n  source {venv_path}/bin/activate")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(parse_arguments()))
diff --git a/pytorch-cpu-requirements.txt b/pytorch-cpu-requirements.txt
index aae0297db..bf9fbd886 100644
--- a/pytorch-cpu-requirements.txt
+++ b/pytorch-cpu-requirements.txt
@@ -1,3 +1,2 @@
---pre
 --index-url https://download.pytorch.org/whl/test/cpu
 torch==2.3.0
diff --git a/shortfin/build_tools/build_linux_package.sh b/shortfin/build_tools/build_linux_package.sh
index afaa1e9fb..91b944e51 100755
--- a/shortfin/build_tools/build_linux_package.sh
+++ b/shortfin/build_tools/build_linux_package.sh
@@ -14,9 +14,10 @@
 # Build everything (all python versions):
 #   sudo ./build_tools/build_linux_package.sh
 #
-# Build specific Python versions to custom directory:
+# Build specific Python versions to custom directory, with tracing enabled:
 #   OVERRIDE_PYTHON_VERSIONS="cp312-cp312 cp313-cp313" \
 #   OUTPUT_DIR="/tmp/wheelhouse" \
+#   SHORTFIN_ENABLE_TRACING="ON" \
 #   sudo -E ./build_tools/build_linux_package.sh
 #
 # Valid Python versions match a subdirectory under /opt/python in the docker
@@ -40,6 +41,8 @@ ARCH="$(uname -m)"
 MANYLINUX_DOCKER_IMAGE="${MANYLINUX_DOCKER_IMAGE:-quay.io/pypa/manylinux_2_28_${ARCH}:latest}"
 PYTHON_VERSIONS="${OVERRIDE_PYTHON_VERSIONS:-cp311-cp311 cp312-cp312 cp313-cp313}"
 OUTPUT_DIR="${OUTPUT_DIR:-${THIS_DIR}/wheelhouse}"
+CACHE_DIR="${CACHE_DIR:-}"
+SHORTFIN_ENABLE_TRACING="${SHORTFIN_ENABLE_TRACING:-ON}"
 
 function run_on_host() {
   echo "Running on host"
@@ -50,12 +53,23 @@ function run_on_host() {
   OUTPUT_DIR="$(cd "${OUTPUT_DIR}" && pwd)"
   echo "Outputting to ${OUTPUT_DIR}"
   mkdir -p "${OUTPUT_DIR}"
+
+  # Setup cache as needed.
+  extra_args=""
+  if ! [ -z "$CACHE_DIR" ]; then
+    echo "Setting up host cache dir ${CACHE_DIR}"
+    mkdir -p "${CACHE_DIR}/ccache"
+    extra_args="${extra_args} -v ${CACHE_DIR}:${CACHE_DIR} -e CACHE_DIR=${CACHE_DIR}"
+  fi
+
   docker run --rm \
     -v "${REPO_ROOT}:${REPO_ROOT}" \
     -v "${OUTPUT_DIR}:${OUTPUT_DIR}" \
     -e __MANYLINUX_BUILD_WHEELS_IN_DOCKER=1 \
     -e "OVERRIDE_PYTHON_VERSIONS=${PYTHON_VERSIONS}" \
     -e "OUTPUT_DIR=${OUTPUT_DIR}" \
+    -e "SHORTFIN_ENABLE_TRACING=${SHORTFIN_ENABLE_TRACING}" \
+    ${extra_args} \
     "${MANYLINUX_DOCKER_IMAGE}" \
     -- ${THIS_DIR}/${SCRIPT_NAME}
 
@@ -72,6 +86,23 @@ function run_in_docker() {
   echo "Using python versions: ${PYTHON_VERSIONS}"
   local orig_path="${PATH}"
 
+  # Configure caching.
+  if [ -z "$CACHE_DIR" ]; then
+    echo "Cache directory not configured. No caching will take place."
+  else
+    # TODO: include this in the dockerfile we use so it gets cached
+    install_ccache
+
+    # TODO: debug low cache hit rate (~30% hits out of 98% cacheable) on CI
+    mkdir -p "${CACHE_DIR}"
+    CACHE_DIR="$(cd ${CACHE_DIR} && pwd)"
+    echo "Caching build artifacts to ${CACHE_DIR}"
+    export CCACHE_DIR="${CACHE_DIR}/ccache"
+    export CCACHE_MAXSIZE="2G"
+    export CMAKE_C_COMPILER_LAUNCHER=ccache
+    export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+  fi
+
   # Build phase.
   echo "******************** BUILDING PACKAGE ********************"
   for python_version in ${PYTHON_VERSIONS}; do
@@ -82,14 +113,44 @@ function run_in_docker() {
     fi
     export PATH="${python_dir}/bin:${orig_path}"
     echo ":::: Python version $(python --version)"
+
     clean_wheels "shortfin" "${python_version}"
     build_shortfin
     run_audit_wheel "shortfin" "${python_version}"
+
+    if ! [ -z "$CACHE_DIR" ]; then
+      echo "ccache stats:"
+      ccache --show-stats
+    fi
   done
 }
 
+function install_ccache() {
+  # This gets an old version.
+  # yum install -y ccache
+
+  CCACHE_VERSION="4.10.2"
+
+  if [[ "${ARCH}" == "x86_64" ]]; then
+    curl --silent --fail --show-error --location \
+        "https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-${ARCH}.tar.xz" \
+        --output ccache.tar.xz
+
+    tar xf ccache.tar.xz
+    cp ccache-${CCACHE_VERSION}-linux-${ARCH}/ccache /usr/local/bin
+  elif [[ "${ARCH}" == "aarch64" ]]; then
+    # Latest version of ccache is not released for arm64, built it
+    git clone --depth 1 --branch "v${CCACHE_VERSION}" https://github.com/ccache/ccache.git
+    mkdir -p ccache/build && cd "$_"
+    cmake -G "Ninja" -DCMAKE_BUILD_TYPE=Release ..
+    ninja
+    cp ccache /usr/bin/
+  fi
+}
+
 function build_shortfin() {
-  export SHORTFIN_ENABLE_TRACING=ON
+  # Note: The SHORTFIN_ENABLE_TRACING environment variable should have been
+  # forwarded from the host environment into Docker above.
   python -m pip wheel --disable-pip-version-check -v -w "${OUTPUT_DIR}" "${REPO_ROOT}/shortfin"
 }
 
diff --git a/shortfin/setup.py b/shortfin/setup.py
index e15b38d89..c603470c2 100644
--- a/shortfin/setup.py
+++ b/shortfin/setup.py
@@ -212,6 +212,7 @@ def build_cmake_configuration(CMAKE_BUILD_DIR: Path, extra_cmake_args=[]):
         "-DSHORTFIN_BUNDLE_DEPS=ON",
         f"-DCMAKE_BUILD_TYPE={cfg}",
         "-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON",
+        "-DSHORTFIN_BUILD_TESTS=OFF",
         f"-DPython3_EXECUTABLE={sys.executable}",
     ] + extra_cmake_args