From 0f46702de35c2295393f008b977c8a3829ef13f7 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Thu, 13 Nov 2025 12:27:00 -0500 Subject: [PATCH 1/4] Fix disk space blowup by adding cache clean --- .github/workflows/docker.yml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b31152e..3182f9a 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,8 +22,7 @@ jobs: push_to_registry: name: Push Docker image to Docker Hub runs-on: - - self-hosted - - docker + - ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v5.0.0 @@ -34,6 +33,17 @@ jobs: VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2) echo "version=$VERSION" >> $GITHUB_OUTPUT + - name: Free up disk space + run: | + echo "Disk space before cleanup:" + df -h + # Remove Docker cache and unused images + docker system prune -af --volumes || true + # Clean buildx cache + docker buildx prune -af || true + echo "Disk space after cleanup:" + df -h + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -59,3 +69,14 @@ jobs: ${{ steps.meta.outputs.tags }} vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }} labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=vectorinstitute/vector-inference:buildcache + cache-to: type=registry,ref=vectorinstitute/vector-inference:buildcache,mode=min + + - name: Clean up after build + if: always() + run: | + echo "Cleaning up build artifacts..." + docker system prune -af || true + docker buildx prune -af || true + echo "Final disk space:" + df -h From 6d102103c2935128ca07c722c6f28c4eb08f7d24 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Thu, 13 Nov 2025 12:30:43 -0500 Subject: [PATCH 2/4] Add trigger for docker workflow to f/sglang-support --- .github/workflows/docker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 3182f9a..f6d4237 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -13,6 +13,7 @@ on: pull_request: branches: - main + - f/sglang-support paths: - Dockerfile - .github/workflows/docker.yml From cd2ca5e88ca3caa4225356b3acae89dbc7e08ae8 Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Thu, 13 Nov 2025 12:42:55 -0500 Subject: [PATCH 3/4] Fixes, it seems to be uv cache related --- .github/workflows/docker.yml | 22 ---------------------- Dockerfile | 4 +++- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index f6d4237..c3cfd4c 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,17 +34,6 @@ jobs: VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2) echo "version=$VERSION" >> $GITHUB_OUTPUT - - name: Free up disk space - run: | - echo "Disk space before cleanup:" - df -h - # Remove Docker cache and unused images - docker system prune -af --volumes || true - # Clean buildx cache - docker buildx prune -af || true - echo "Disk space after cleanup:" - df -h - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -70,14 +59,3 @@ jobs: ${{ steps.meta.outputs.tags }} vectorinstitute/vector-inference:${{ steps.vllm-version.outputs.version }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=vectorinstitute/vector-inference:buildcache - cache-to: type=registry,ref=vectorinstitute/vector-inference:buildcache,mode=min - - - name: Clean up after build - if: always() - run: | - echo "Cleaning up build artifacts..." - docker system prune -af || true - docker buildx prune -af || true - echo "Final disk space:" - df -h diff --git a/Dockerfile b/Dockerfile index 7202091..d15fbf8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,7 +57,9 @@ WORKDIR /vec-inf COPY . /vec-inf # Install project dependencies with build requirements -RUN uv pip install --system -e .[dev] --prerelease=allow +# Use --no-cache to prevent uv from storing both downloaded and extracted packages +RUN uv pip install --system -e .[dev] --prerelease=allow --no-cache && \ + rm -rf /root/.cache/uv /tmp/* # Install a single, system NCCL (from NVIDIA CUDA repo in base image) RUN apt-get update && apt-get install -y --allow-change-held-packages\ From de83788e3956417c7b9910e46b1cb116c8909f8a Mon Sep 17 00:00:00 2001 From: Amrit Krishnan Date: Thu, 13 Nov 2025 12:54:54 -0500 Subject: [PATCH 4/4] Try again with some aggressive pre cleanup --- .github/workflows/docker.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c3cfd4c..597003c 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -34,6 +34,24 @@ jobs: VERSION=$(grep -A 1 'name = "vllm"' uv.lock | grep version | cut -d '"' -f 2) echo "version=$VERSION" >> $GITHUB_OUTPUT + - name: Maximize build space + run: | + echo "Disk space before cleanup:" + df -h + # Remove unnecessary pre-installed software + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo rm -rf /usr/local/share/boost + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + # Clean apt cache + sudo apt-get clean + # Remove docker images + docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true + echo "Disk space after cleanup:" + df -h + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3