vllm-project
diff --git a/‎vllm/config/scheduler.py‎
Lines changed: 4 additions & 0 deletions b/‎vllm/config/scheduler.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎vllm/platforms/rocm.py‎
Lines changed: 5 additions & 0 deletions b/‎vllm/platforms/rocm.py‎
Lines changed: 5 additions & 0 deletions
@@ -137,6 +137,10 @@ class SchedulerConfig:
     structured outputs, speculative decoding, and pipeline parallelism.
     """
 
+    split_prefill_from_chunk: bool = False
+    """Whether to split the prefill request into pure prefill and chunked
+    prefill in a single batch."""
+
     def compute_hash(self) -> str:
         """
         WARNING: Whenever a new field is added to this config,
 
@@ -391,6 +391,11 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
         ):
             compilation_config.custom_ops.append("+rms_norm")
 
+        if envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA:
+            # enable the request reorder if we are using AITER MHA
+            # for calculation
+            vllm_config.scheduler_config.split_prefill_from_chunk = True
+
     @classmethod
     def verify_model_arch(cls, model_arch: str) -> None:
         if model_arch in _ROCM_UNSUPPORTED_MODELS: