Add IPEX Allreduce

vllm-project · Jul 12, 2024 · 4a67fb9 · 4a67fb9
1 parent f3fa6b9
commit 4a67fb9
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 7 deletions.
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -13,10 +13,9 @@ RUN pip install intel-openmp
 
 ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so:$LD_PRELOAD"
 
-
 RUN echo 'ulimit -c 0' >> ~/.bashrc
 
-RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.3.100%2Bgit0eb3473-cp310-cp310-linux_x86_64.whl
+RUN pip install --proxy http://child-prc.intel.com:913 http://mlpc.intel.com/downloads/cpu/ipex-2.4/rc0/intel_extension_for_pytorch-2.4.0-cp310-cp310-manylinux2014_x86_64.whl
 
 RUN pip install --upgrade pip \
     && pip install wheel packaging ninja "setuptools>=49.4.0" numpy
@@ -27,7 +26,7 @@ COPY ./ /workspace/vllm
 
 WORKDIR /workspace/vllm
 
-RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/test/cpu
 
 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
 ARG VLLM_CPU_DISABLE_AVX512

diff --git a/docs/source/getting_started/cpu-installation.rst b/docs/source/getting_started/cpu-installation.rst
@@ -88,8 +88,6 @@ Intel Extension for PyTorch
 
 - `Intel Extension for PyTorch (IPEX) <https://github.com/intel/intel-extension-for-pytorch>`_ extends PyTorch with up-to-date features optimizations for an extra performance boost on Intel hardware.
 
-- IPEX after the ``2.3.0`` can be enabled in the CPU backend by default if it is installed.
-
 .. _cpu_backend_performance_tips:
 
 Performance tips

diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -2,6 +2,6 @@
 -r requirements-common.txt
 
 # Dependencies for x86_64 CPUs
-torch == 2.3.1+cpu; platform_machine != "ppc64le"
-torchvision == 0.18.1+cpu; platform_machine != "ppc64le"   # required for the image processor of phi3v, this must be updated alongside torch
+torch == 2.4.0; platform_machine != "ppc64le"
+torchvision; platform_machine != "ppc64le"   # required for the image processor of phi3v, this must be updated alongside torch
 triton >= 2.2.0  # FIXME(woosuk): This is a hack to avoid import error.
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
@@ -288,6 +288,9 @@ def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
         pynccl_comm = self.pynccl_comm
         if (pynccl_comm is not None and not pynccl_comm.disabled):
             pynccl_comm.all_reduce(input_)
+        elif input_.is_cpu:
+            import intel_extension_for_pytorch as ipex
+            ipex.distributed.all_reduce(input_, group=self.device_group)
         else:
             torch.distributed.all_reduce(input_, group=self.device_group)
         return input_

diff --git a/vllm/executor/cpu_executor.py b/vllm/executor/cpu_executor.py
@@ -48,6 +48,10 @@ def _init_executor(self) -> None:
             os.environ['KMP_PLAIN_BARRIER_PATTERN'] = "dist,dist"
             os.environ['KMP_REDUCTION_BARRIER_PATTERN'] = "dist,dist"
 
+        # To hint IPEX uses shared memory based AllReduce
+        os.environ["LOCAL_WORLD_SIZE"] = str(
+                self.parallel_config.tensor_parallel_size)
+
         self.model_config = _verify_and_get_model_config(self.model_config)
         self.cache_config = _verify_and_get_cache_config(self.cache_config)
         self.scheduler_config = _verify_and_get_scheduler_config(