From e90457674380f931bb95c0350af4ad83af568d72 Mon Sep 17 00:00:00 2001
From: Daniele <36171005+dtrifiro@users.noreply.github.com>
Date: Thu, 8 Aug 2024 21:24:52 +0200
Subject: [PATCH] [CI/Build] Dockerfile.cpu improvements (#7298)

---
 .dockerignore  |  3 +++
 Dockerfile.cpu | 30 +++++++++++++++++++++---------
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 5cfe0dcb065dc..79fa088fa809c 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,4 @@
 vllm/*.so
+/.venv
+/build
+dist
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index 78730f39721cb..35ce5dde99d2a 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -2,14 +2,16 @@
 
 FROM ubuntu:22.04 AS cpu-test-1
 
-RUN apt-get update -y \
-    && apt-get install -y curl git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
+RUN --mount=type=cache,target=/var/cache/apt \
+    apt-get update -y \
+    && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
     && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
 
 # https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html
 # intel-openmp provides additional performance improvement vs. openmp
 # tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects.
-RUN pip install intel-openmp
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install intel-openmp
 
 ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so:$LD_PRELOAD"
 
@@ -17,22 +19,32 @@ RUN echo 'ulimit -c 0' >> ~/.bashrc
 
 RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl
 
-RUN pip install --upgrade pip \
-    && pip install wheel packaging ninja "setuptools>=49.4.0" numpy
+ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,src=requirements-build.txt,target=requirements-build.txt \
+    pip install --upgrade pip && \
+    pip install -r requirements-build.txt
 
 FROM cpu-test-1 AS build
 
-COPY ./ /workspace/vllm
-
 WORKDIR /workspace/vllm
 
-RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,src=requirements-common.txt,target=requirements-common.txt \
+    --mount=type=bind,src=requirements-cpu.txt,target=requirements-cpu.txt \
+    pip install -v -r requirements-cpu.txt
+
+COPY ./ ./
 
 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
 ARG VLLM_CPU_DISABLE_AVX512
 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
 
-RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=cache,target=/root/.cache/ccache \
+    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
+    pip install dist/*.whl
 
 WORKDIR /workspace/