From e1152b1e0703c693a0fdac9d63bb42efb93e3db4 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Wed, 7 Feb 2024 14:42:02 -0800
Subject: [PATCH] Add documentation on how to do incremental builds (#2796)

---
 docs/source/getting_started/installation.rst | 10 ++++++++++
 setup.py                                     |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
index 911c3d8f9a4a..77b0ae65838a 100644
--- a/docs/source/getting_started/installation.rst
+++ b/docs/source/getting_started/installation.rst
@@ -67,3 +67,13 @@ You can also build and install vLLM from source:
 
         $ # Use `--ipc=host` to make sure the shared memory is large enough.
         $ docker run --gpus all -it --rm --ipc=host nvcr.io/nvidia/pytorch:23.10-py3
+
+.. note::
+    If you are developing the C++ backend of vLLM, consider building vLLM with
+
+    .. code-block:: console
+
+        $ python setup.py develop
+
+    since it will give you incremental builds. The downside is that this method
+    is `deprecated by setuptools <https://github.com/pypa/setuptools/issues/917>`_.
diff --git a/setup.py b/setup.py
index 9cc4aea0ea75..60efed0720ff 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,11 @@
 
 ROOT_DIR = os.path.dirname(__file__)
 
+# If you are developing the C++ backend of vLLM, consider building vLLM with
+# `python setup.py develop` since it will give you incremental builds.
+# The downside is that this method is deprecated, see
+# https://github.com/pypa/setuptools/issues/917
+
 MAIN_CUDA_VERSION = "12.1"
 
 # Supported NVIDIA GPU architectures.