vllm-project · qiruiyangmeta · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025
@@ -68,6 +68,8 @@ class ParallelConfig:
     """Number of pipeline parallel groups."""
     tensor_parallel_size: int = 1
     """Number of tensor parallel groups."""
+    context_parallel_size: int = 1
+    """Number of context parallel groups."""
     data_parallel_size: int = 1
     """Number of data parallel groups. MoE layers will be sharded according to
     the product of the tensor parallel size and data parallel size."""
@@ -185,7 +187,7 @@ class is dynamically inherited by the worker class. This is used to inject
     calls."""
 
     world_size: int = field(init=False)
-    """world_size is TPxPP, it affects the number of workers we create."""
+    """world_size is TPxCPxPP, it affects the number of workers we create."""
 
     rank: int = 0
     """Global rank in distributed setup."""
@@ -335,6 +337,7 @@ def compute_hash(self):
         factors: list[Any] = []
         factors.append(self.pipeline_parallel_size)
         factors.append(self.tensor_parallel_size)
+        factors.append(self.context_parallel_size)
         factors.append(self.enable_expert_parallel)
         factors.append(self.data_parallel_size)
         factors.append(envs.VLLM_ALL2ALL_BACKEND)
@@ -374,7 +377,7 @@ def __post_init__(self) -> None:
 
         # Continue with the rest of the initialization
         self.world_size = self.pipeline_parallel_size * \
-            self.tensor_parallel_size
+            self.tensor_parallel_size * self.context_parallel_size
 
         if self.distributed_executor_backend == "external_launcher":
             logger.info("Using external launcher for distributed inference.")