Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vllm/platforms/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,7 @@ def get_device_communicator_cls(cls) -> str:
Get device specific communicator class for distributed communication.
"""
return "vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator" # noqa

@classmethod
def supports_structured_output(cls) -> bool:
return True
4 changes: 4 additions & 0 deletions vllm/platforms/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ def supports_fp8(cls) -> bool:
def supports_v1(cls, model_config: ModelConfig) -> bool:
return True

@classmethod
def supports_structured_output(cls) -> bool:
return True

@classmethod
def use_custom_allreduce(cls) -> bool:
return True
Expand Down
4 changes: 4 additions & 0 deletions vllm/platforms/hpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,7 @@ def get_punica_wrapper(cls) -> str:
@classmethod
def get_device_communicator_cls(cls) -> str:
return "vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator" # noqa

@classmethod
def supports_structured_output(cls) -> bool:
return True
7 changes: 7 additions & 0 deletions vllm/platforms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,13 @@ def supports_v1(cls, model_config: ModelConfig) -> bool:
"""
return False

@classmethod
def supports_structured_output(cls) -> bool:
"""
Returns whether the current platform can support structured output.
"""
return False

@classmethod
def use_custom_allreduce(cls) -> bool:
"""
Expand Down
4 changes: 4 additions & 0 deletions vllm/platforms/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,7 @@ def get_device_communicator_cls(cls) -> str:
@classmethod
def use_all_gather(cls) -> bool:
return True

@classmethod
def supports_structured_output(cls) -> bool:
return True
4 changes: 4 additions & 0 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ def supports_v1(cls, model_config: ModelConfig) -> bool:
# V1 support on AMD gpus is experimental
return True

@classmethod
def supports_structured_output(cls) -> bool:
return True

@classmethod
def use_custom_allreduce(cls) -> bool:
# We only enable custom allreduce for MI300 series
Expand Down
5 changes: 5 additions & 0 deletions vllm/platforms/tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,8 @@ def use_all_gather(cls) -> bool:
def supports_v1(cls, model_config: ModelConfig) -> bool:
# V1 support on TPU is experimental
return True

@classmethod
def supports_structured_output(cls) -> bool:
# Structured output is not supported on TPU.
return False
4 changes: 4 additions & 0 deletions vllm/platforms/xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,7 @@ def device_support_bf16(cls) -> bool:
@classmethod
def get_device_communicator_cls(cls) -> str:
return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa

@classmethod
def supports_structured_output(cls) -> bool:
return True
8 changes: 5 additions & 3 deletions vllm/v1/engine/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,11 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
f" != {engine_level_backend}")
else:
params.guided_decoding.backend = engine_level_backend
import vllm.platforms
if vllm.platforms.current_platform.is_tpu():
raise ValueError("Structured output is not supported on TPU.")

from vllm.platforms import current_platform
if not current_platform.supports_structured_output():
raise ValueError("Structured output is not supported on "
f"{current_platform.device_name}.")

# Request content validation
if engine_level_backend.startswith("xgrammar"):
Expand Down