diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py
index 89a131e8ea24..21af5eb76ee8 100644
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -39,7 +39,8 @@ def compute_hash(self, vllm_config: VllmConfig) -> str:
         Gather all the relevant information from the vLLM config,
         to compute a hash so that we can cache the compiled model.
 
-        See {meth}`VllmConfig.compute_hash` to check what information
+        See [`VllmConfig.compute_hash`][vllm.config.VllmConfig.compute_hash]
+        to check what information
         is already considered by default. This function should only
         consider the information that is specific to the compiler.
         """
diff --git a/vllm/config.py b/vllm/config.py
index c0671d2524ec..2bba6810d714 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -2980,7 +2980,7 @@ class PoolerConfig:
     pooling_type: Optional[str] = None
     """
     The pooling method of the pooling model. This should be a key in
-    {class}`vllm.model_executor.layers.pooler.PoolingType`.
+    [`vllm.model_executor.layers.pooler.PoolingType`][].
     """
 
     normalize: Optional[bool] = None
@@ -3691,23 +3691,27 @@ class CompilationConfig:
     """Configuration for compilation. It has three parts:
 
     - Top-level Compilation control:
-        - {attr}`level`
-        - {attr}`debug_dump_path`
-        - {attr}`cache_dir`
-        - {attr}`backend`
-        - {attr}`custom_ops`
-        - {attr}`splitting_ops`
+        - [`level`][vllm.config.CompilationConfig.level]
+        - [`debug_dump_path`][vllm.config.CompilationConfig.debug_dump_path]
+        - [`cache_dir`][vllm.config.CompilationConfig.cache_dir]
+        - [`backend`][vllm.config.CompilationConfig.backend]
+        - [`custom_ops`][vllm.config.CompilationConfig.custom_ops]
+        - [`splitting_ops`][vllm.config.CompilationConfig.splitting_ops]
     - CudaGraph capture:
-        - {attr}`use_cudagraph`
-        - {attr}`cudagraph_capture_sizes`
-        - {attr}`cudagraph_num_of_warmups`
-        - {attr}`cudagraph_copy_inputs`
-        - {attr}`full_cuda_graph`
+        - [`use_cudagraph`][vllm.config.CompilationConfig.use_cudagraph]
+        - [`cudagraph_capture_sizes`]
+        [vllm.config.CompilationConfig.cudagraph_capture_sizes]
+        - [`cudagraph_num_of_warmups`]
+        [vllm.config.CompilationConfig.cudagraph_num_of_warmups]
+        - [`cudagraph_copy_inputs`]
+        [vllm.config.CompilationConfig.cudagraph_copy_inputs]
+        - [`full_cuda_graph`][vllm.config.CompilationConfig.full_cuda_graph]
     - Inductor compilation:
-        - {attr}`use_inductor`
-        - {attr}`compile_sizes`
-        - {attr}`inductor_compile_config`
-        - {attr}`inductor_passes`
+        - [`use_inductor`][vllm.config.CompilationConfig.use_inductor]
+        - [`compile_sizes`][vllm.config.CompilationConfig.compile_sizes]
+        - [`inductor_compile_config`]
+        [vllm.config.CompilationConfig.inductor_compile_config]
+        - [`inductor_passes`][vllm.config.CompilationConfig.inductor_passes]
         - custom inductor passes
 
     Why we have different sizes for cudagraph and inductor:
diff --git a/vllm/connections.py b/vllm/connections.py
index 9abc66050e18..84e32a4d5ca9 100644
--- a/vllm/connections.py
+++ b/vllm/connections.py
@@ -167,4 +167,7 @@ async def async_download_file(
 
 
 global_http_connection = HTTPConnection()
-"""The global {class}`HTTPConnection` instance used by vLLM."""
+"""
+The global [`HTTPConnection`][vllm.connections.HTTPConnection] instance used
+by vLLM.
+"""
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 56b9e49d24d9..19b219b674f3 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -475,7 +475,8 @@ async def add_request_async(
             *,
             inputs: Optional[PromptType] = None,  # DEPRECATED
     ) -> None:
-        """Async version of {meth}`add_request`."""
+        """Async version of
+        [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]."""
         if inputs is not None:
             prompt = inputs
         assert prompt is not None and params is not None
@@ -582,20 +583,21 @@ async def build_guided_decoding_logits_processor_async(
 
 
 class AsyncLLMEngine(EngineClient):
-    """An asynchronous wrapper for {class}`LLMEngine`.
+    """An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine].
 
-    This class is used to wrap the {class}`LLMEngine` class to make it
-    asynchronous. It uses asyncio to create a background loop that keeps
-    processing incoming requests. The {class}`LLMEngine` is kicked by the
-    generate method when there are requests in the waiting queue. The generate
-    method yields the outputs from the {class}`LLMEngine` to the caller.
+    This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to
+    make it asynchronous. It uses asyncio to create a background loop that keeps
+    processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked
+    by the generate method when there are requests in the waiting queue. The
+    generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine]
+    to the caller.
 
     Args:
         log_requests: Whether to log the requests.
         start_engine_loop: If True, the background task to run the engine
             will be automatically started in the generate call.
-        *args: Arguments for {class}`LLMEngine`.
-        **kwargs: Arguments for {class}`LLMEngine`.
+        *args: Arguments for [`LLMEngine`][vllm.LLMEngine].
+        **kwargs: Arguments for [`LLMEngine`][vllm.LLMEngine].
     """
 
     _engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine
@@ -985,8 +987,9 @@ async def generate(
         from the LLMEngine to the caller.
 
         Args:
-            prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
-                for more details about the format of each input.
+            prompt: The prompt to the LLM. See
+                [`PromptType`][vllm.inputs.PromptType] for more details about
+                the format of each input.
             sampling_params: The sampling parameters of the request.
             request_id: The unique id of the request.
             lora_request: LoRA request to use for generation, if any.
@@ -1003,7 +1006,7 @@ async def generate(
         Details:
             - If the engine is not running, start the background loop,
               which iteratively invokes
-              {meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
+              [`engine_step`][vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step]
               to process the waiting requests.
             - Add the request to the engine's `RequestTracker`.
               On the next background loop, this request will be sent to
@@ -1075,8 +1078,9 @@ async def encode(
         from the LLMEngine to the caller.
 
         Args:
-            prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
-                for more details about the format of each input.
+            prompt: The prompt to the LLM. See
+                [`PromptType`][vllm.inputs.PromptType] for more details about
+                the format of each input.
             pooling_params: The pooling parameters of the request.
             request_id: The unique id of the request.
             lora_request: LoRA request to use for generation, if any.
@@ -1089,15 +1093,15 @@ async def encode(
             for the request.
 
         Details:
-        - If the engine is not running, start the background loop,
-            which iteratively invokes
-            {meth}`~vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`
-            to process the waiting requests.
-        - Add the request to the engine's `RequestTracker`.
-            On the next background loop, this request will be sent to
-            the underlying engine.
-            Also, a corresponding `AsyncStream` will be created.
-        - Wait for the request outputs from `AsyncStream` and yield them.
+            - If the engine is not running, start the background loop,
+                which iteratively invokes
+                [`vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step`][]
+                to process the waiting requests.
+            - Add the request to the engine's `RequestTracker`.
+                On the next background loop, this request will be sent to
+                the underlying engine.
+                Also, a corresponding `AsyncStream` will be created.
+            - Wait for the request outputs from `AsyncStream` and yield them.
 
         Example:
         ```
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 2e5361c4891b..27f3992745fe 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -130,11 +130,11 @@ class LLMEngine:
     iteration-level scheduling and efficient memory management to maximize the
     serving throughput.
 
-    The [LLM][vllm.LLM] class wraps this class for offline batched inference
-    and the [AsyncLLMEngine][] class wraps this class for online serving.
+    The [`LLM`][vllm.LLM] class wraps this class for offline batched inference
+    and the [`AsyncLLMEngine`][vllm.engine.async_llm_engine.AsyncLLMEngine]
+    class wraps this class for online serving.
 
-    The config arguments are derived from [EngineArgs][vllm.EngineArgs]. (See
-    [engine-args][])
+    The config arguments are derived from [`EngineArgs`][vllm.EngineArgs].
 
     Args:
         vllm_config: The configuration for initializing and running vLLM.
diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py
index eea89a9a055f..18b7c187bdff 100644
--- a/vllm/engine/multiprocessing/client.py
+++ b/vllm/engine/multiprocessing/client.py
@@ -492,8 +492,9 @@ def generate(
         from the LLMEngine to the caller.
 
         Args:
-            prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
-                for more details about the format of each input.
+            prompt: The prompt to the LLM. See
+                [`PromptType`][vllm.inputs.PromptType] for more details about
+                the format of each input.
             sampling_params: The sampling parameters of the request.
             request_id: The unique id of the request.
             lora_request: LoRA request to use for generation, if any.
@@ -561,8 +562,9 @@ def encode(
         from the LLMEngine to the caller.
 
         Args:
-            prompt: The prompt to the LLM. See {class}`~vllm.inputs.PromptType`
-                for more details about the format of each input.
+            prompt: The prompt to the LLM. See
+                [`PromptType`][vllm.inputs.PromptType] for more details about
+                the format of each input.
             pooling_params: The pooling parameters of the request.
             request_id: The unique id of the request.
             lora_request: LoRA request to use for generation, if any.
diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py
index ac234d25373d..434cb4985562 100644
--- a/vllm/engine/multiprocessing/engine.py
+++ b/vllm/engine/multiprocessing/engine.py
@@ -42,19 +42,22 @@
 
 
 class MQLLMEngine:
-    """A multiprocessing wrapper for {class}`LLMEngine`.
+    """A multiprocessing wrapper for
+    [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
 
-    This class is used to wrap the {class}`LLMEngine` class to enable use
+    This class is used to wrap the
+    [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use
     in concurrnet manner. It runs a background loop and uses zeromq to
     receive new requests and stream outputs incrementally via ipc.
 
-    The {class}`LLMEngine` generate or encode process is kicked off when a new
-    RPCProcessRequest is received by the input_socket.
+    The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode
+    process is kicked off when a new RPCProcessRequest is received by the
+    input_socket.
 
     The self.engine_loop checks the input_socket for new requests,
     adds them to the LLMEngine if there are any, calls the internal
-    {class}`LLMEngine.step()`, and sends the RequestOutputs back over
-    the output_socket.
+    [`LLMEngine.step()`][vllm.engine.llm_engine.LLMEngine.step], and sends
+    the RequestOutputs back over the output_socket.
 
     If use_async_sockets is set, the logic associated with reading new
     requests from the socket and sending data to the socket is passed
@@ -65,8 +68,8 @@ class MQLLMEngine:
         ipc_path: Base path for zeromq interprocess messaging
         use_async_sockets: Whether to make send/recv async with GPU
         log_requests: Whether to log the requests.
-        *args: Arguments for {class}`LLMEngine`.
-        **kwargs: Arguments for {class}`LLMEngine`.
+        *args: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
+        **kwargs: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine].
     """
 
     def __init__(self,
diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py
index 323580fa7482..110f84a65efc 100644
--- a/vllm/engine/output_processor/multi_step.py
+++ b/vllm/engine/output_processor/multi_step.py
@@ -56,8 +56,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
         scheduled computation.
 
         Args:
-          seq_group: the outputs are associated with this {class}`SequenceGroup`
-          outputs: the {class}`SequenceGroupOutput`s for all scheduler steps
+          seq_group: the outputs are associated with this
+              [`SequenceGroup`][vllm.sequence.SequenceGroup]
+          outputs: the
+              [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]s
+              for all scheduler steps
         """
         for output in outputs:
             # Concatenate single-step prompt logprob processing results.
diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py
index ea4b71a5b9cd..e88f119c8742 100644
--- a/vllm/engine/output_processor/single_step.py
+++ b/vllm/engine/output_processor/single_step.py
@@ -19,17 +19,21 @@
 def single_step_process_prompt_logprob(
         sg_output_proc: SequenceGroupOutputProcessor, seq_group: SequenceGroup,
         output: CompletionSequenceGroupOutput) -> None:
-    """Process prompt logprobs associated with the {class}`SequenceGroupOutput`
-    for a given step.
+    """Process prompt logprobs associated with the
+    [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] for a given step.
 
     Do nothing if the output has no prompt logprobs.
 
     Account for the fact that transformers do not compute first-token logprobs.
     
     Args:
-      sg_output_proc: {class}`SequenceGroupOutputProcessor` instance
-      seq_group: the output is associated with this {class}`SequenceGroup`
-      output: the {class}`SequenceGroupOutput` for a single scheduler step
+      sg_output_proc:
+          [`SequenceGroupOutputProcessor`][vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor]
+          instance
+      seq_group: the output is associated with this
+          [`SequenceGroup`][vllm.sequence.SequenceGroup]
+      output: the [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
+          for a single scheduler step
     """
     prompt_logprobs = output.prompt_logprobs
 
@@ -103,8 +107,11 @@ def process_prompt_logprob(self, seq_group: SequenceGroup,
         scheduled computation.
         
         Args:
-          seq_group: the output is associated with this {class}`SequenceGroup`
-          outputs: the {class}`SequenceGroupOutput` for a single scheduler step
+          seq_group: the output is associated with this
+              [`SequenceGroup`][vllm.sequence.SequenceGroup]
+          outputs: the
+              [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput]
+              for a single scheduler step
         """
         assert len(outputs) == 1, "Single step should only have 1 output."
         output = outputs[0]
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index f818e1737975..1c7bd65053f9 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -129,8 +129,7 @@ class LLM:
         compilation_config: Either an integer or a dictionary. If it is an
             integer, it is used as the level of compilation optimization. If it
             is a dictionary, it can specify the full compilation configuration.
-        **kwargs: Arguments for [EngineArgs][vllm.EngineArgs]. (See
-            [engine-args][])
+        **kwargs: Arguments for [`EngineArgs`][vllm.EngineArgs].
 
     Note:
         This class is intended to be used for offline inference. For online
@@ -494,7 +493,7 @@ def collective_rpc(self,
                 `self` argument, in addition to the arguments passed in `args`
                 and `kwargs`. The `self` argument will be the worker object.
             timeout: Maximum time in seconds to wait for execution. Raises a
-                {exc}`TimeoutError` on timeout. `None` means wait indefinitely.
+                [`TimeoutError`][] on timeout. `None` means wait indefinitely.
             args: Positional arguments to pass to the worker method.
             kwargs: Keyword arguments to pass to the worker method.
 
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 93de9f3a5c05..c73575b48d9c 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -582,7 +582,8 @@ def _tokenize_prompt_input(
         add_special_tokens: bool = True,
     ) -> TextTokensPrompt:
         """
-        A simpler implementation of {meth}`_tokenize_prompt_input_or_inputs`
+        A simpler implementation of
+        [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs]
         that assumes single input.
         """
         return next(
@@ -603,7 +604,8 @@ def _tokenize_prompt_inputs(
         add_special_tokens: bool = True,
     ) -> Iterator[TextTokensPrompt]:
         """
-        A simpler implementation of {meth}`_tokenize_prompt_input_or_inputs`
+        A simpler implementation of
+        [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs]
         that assumes multiple inputs.
         """
         for text in prompt_inputs:
diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
index 522bd940211f..40ca1d29939a 100644
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@@ -74,7 +74,7 @@ def collective_rpc(self,
                 `self` argument, in addition to the arguments passed in `args`
                 and `kwargs`. The `self` argument will be the worker object.
             timeout: Maximum time in seconds to wait for execution. Raises a
-                {exc}`TimeoutError` on timeout. `None` means wait indefinitely.
+                [`TimeoutError`][] on timeout. `None` means wait indefinitely.
             args: Positional arguments to pass to the worker method.
             kwargs: Keyword arguments to pass to the worker method.
 
diff --git a/vllm/inputs/__init__.py b/vllm/inputs/__init__.py
index 0673aece9108..df4f844cd815 100644
--- a/vllm/inputs/__init__.py
+++ b/vllm/inputs/__init__.py
@@ -10,8 +10,9 @@
 
 INPUT_REGISTRY = InputRegistry()
 """
-The global {class}`~InputRegistry` which is used by {class}`~vllm.LLMEngine`
-to dispatch data processing according to the target model.
+The global [`InputRegistry`][vllm.inputs.registry.InputRegistry] which is used
+by [`LLMEngine`][vllm.LLMEngine] to dispatch data processing according to the
+target model.
 """
 
 __all__ = [
diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py
index 3b58ec47d5bf..843c45bd6163 100644
--- a/vllm/inputs/data.py
+++ b/vllm/inputs/data.py
@@ -80,22 +80,24 @@ class EmbedsPrompt(TypedDict):
 """
 Set of possible schemas for a single prompt:
 
-- A text prompt ({class}`str` or {class}`TextPrompt`)
-- A tokenized prompt ({class}`TokensPrompt`)
-- An embeddings prompt ({class}`EmbedsPrompt`)
+- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
+- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
+- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
 
 Note that "singleton" is as opposed to a data structure
 which encapsulates multiple prompts, i.e. of the sort
 which may be utilized for encoder/decoder models when
 the user desires to express both the encoder & decoder
-prompts explicitly, i.e. {class}`ExplicitEncoderDecoderPrompt`
+prompts explicitly, i.e. 
+[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
 
-A prompt of type {class}`SingletonPrompt` may be employed
-as (1) input to a decoder-only model, (2) input to
+A prompt of type [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] may be 
+employed as (1) input to a decoder-only model, (2) input to
 the encoder of an encoder/decoder model, in the scenario
 where the decoder-prompt is not specified explicitly, or
 (3) as a member of a larger data structure encapsulating
-more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
+more than one prompt, i.e. 
+[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
 """
 
 
@@ -126,18 +128,20 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
     comprising an explicit encoder prompt and a decoder prompt.
 
     The encoder and decoder prompts, respectively, may be formatted
-    according to any of the {class}`SingletonPrompt` schemas,
+    according to any of the
+    [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] schemas,
     and are not required to have the same schema.
 
     Only the encoder prompt may have multi-modal data. mm_processor_kwargs
     should be at the top-level, and should not be set in the encoder/decoder
     prompts, since they are agnostic to the encoder/decoder.
 
-    Note that an {class}`ExplicitEncoderDecoderPrompt` may not
-    be used as an input to a decoder-only model,
+    Note that an
+    [`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
+    may not be used as an input to a decoder-only model,
     and that the `encoder_prompt` and `decoder_prompt`
     fields of this data structure themselves must be
-    {class}`SingletonPrompt` instances.
+    [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] instances.
     """
 
     encoder_prompt: _T1_co
@@ -152,11 +156,11 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
 Set of possible schemas for an LLM input, including
 both decoder-only and encoder/decoder input types:
 
-- A text prompt ({class}`str` or {class}`TextPrompt`)
-- A tokenized prompt ({class}`TokensPrompt`)
-- An embeddings prompt ({class}`EmbedsPrompt`)
+- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
+- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
+- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
 - A single data structure containing both an encoder and a decoder prompt
-  ({class}`ExplicitEncoderDecoderPrompt`)
+  ([`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt])
 """
 
 
@@ -189,7 +193,8 @@ def token_inputs(
     prompt: Optional[str] = None,
     cache_salt: Optional[str] = None,
 ) -> TokenInputs:
-    """Construct {class}`TokenInputs` from optional values."""
+    """Construct [`TokenInputs`][vllm.inputs.data.TokenInputs] from optional
+    values."""
     inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
 
     if prompt is not None:
@@ -221,7 +226,8 @@ def embeds_inputs(
     prompt_embeds: torch.Tensor,
     cache_salt: Optional[str] = None,
 ) -> EmbedsInputs:
-    """Construct :class:`EmbedsInputs` from optional values."""
+    """Construct [`EmbedsInputs`][vllm.inputs.data.EmbedsInputs] from optional
+    values."""
     inputs = EmbedsInputs(type="embeds", prompt_embeds=prompt_embeds)
 
     if cache_salt is not None:
@@ -232,7 +238,7 @@ def embeds_inputs(
 
 DecoderOnlyInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
 """
-The inputs in {class}`~vllm.LLMEngine` before they are
+The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they are
 passed to the model executor.
 This specifies the data required for decoder-only models.
 """
@@ -240,11 +246,12 @@ def embeds_inputs(
 
 class EncoderDecoderInputs(TypedDict):
     """
-    The inputs in {class}`~vllm.LLMEngine` before they are
-    passed to the model executor.
+    The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they
+    are passed to the model executor.
 
     This specifies the required data for encoder-decoder models.
     """
+
     encoder: Union[TokenInputs, "MultiModalInputs"]
     """The inputs for the encoder portion."""
 
@@ -254,13 +261,13 @@ class EncoderDecoderInputs(TypedDict):
 
 SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
 """
-A processed {class}`SingletonPrompt` which can be passed to
-{class}`vllm.sequence.Sequence`.
+A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be 
+passed to [`vllm.sequence.Sequence`][].
 """
 
 ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
 """
-The inputs to {data}`vllm.inputs.InputProcessor`.
+The outputs from [`vllm.inputs.preprocess.InputPreprocessor`][].
 """
 
 _T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
@@ -277,7 +284,8 @@ def build_explicit_enc_dec_prompt(
     return ExplicitEncoderDecoderPrompt(
         encoder_prompt=encoder_prompt,
         decoder_prompt=decoder_prompt,
-        mm_processor_kwargs=mm_processor_kwargs)
+        mm_processor_kwargs=mm_processor_kwargs,
+    )
 
 
 def zip_enc_dec_prompts(
@@ -288,7 +296,8 @@ def zip_enc_dec_prompts(
 ) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
     """
     Zip encoder and decoder prompts together into a list of
-    {class}`ExplicitEncoderDecoderPrompt` instances.
+    [`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
+    instances.
 
     ``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
     dictionary will be used for every encoder/decoder prompt. If an iterable is
@@ -299,10 +308,11 @@ def zip_enc_dec_prompts(
     if isinstance(mm_processor_kwargs, dict):
         return [
             build_explicit_enc_dec_prompt(
-                encoder_prompt, decoder_prompt,
-                cast(dict[str, Any], mm_processor_kwargs))
-            for (encoder_prompt,
-                 decoder_prompt) in zip(enc_prompts, dec_prompts)
+                encoder_prompt,
+                decoder_prompt,
+                cast(dict[str, Any], mm_processor_kwargs),
+            ) for (encoder_prompt,
+                   decoder_prompt) in zip(enc_prompts, dec_prompts)
         ]
     return [
         build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt,
diff --git a/vllm/inputs/parse.py b/vllm/inputs/parse.py
index d17122b48344..4c64a41ace31 100644
--- a/vllm/inputs/parse.py
+++ b/vllm/inputs/parse.py
@@ -23,13 +23,13 @@ class ParsedTokens(TypedDict):
 
 @overload
 def parse_and_batch_prompt(
-        prompt: Union[str, list[str]]) -> Sequence[ParsedText]:
+    prompt: Union[str, list[str]], ) -> Sequence[ParsedText]:
     ...
 
 
 @overload
 def parse_and_batch_prompt(
-        prompt: Union[list[int], list[list[int]]]) -> Sequence[ParsedTokens]:
+    prompt: Union[list[int], list[list[int]]], ) -> Sequence[ParsedTokens]:
     ...
 
 
@@ -86,7 +86,7 @@ class ParsedTokensPrompt(TypedDict):
 
 
 class ParsedEmbedsPrompt(TypedDict):
-    type: Literal['embeds']
+    type: Literal["embeds"]
     content: EmbedsPrompt
 
 
@@ -133,7 +133,7 @@ def parse_singleton_prompt(prompt: SingletonPrompt) -> ParsedSingletonPrompt:
 
 
 def is_explicit_encoder_decoder_prompt(
-        prompt: PromptType) -> TypeIs[ExplicitEncoderDecoderPrompt]:
+    prompt: PromptType, ) -> TypeIs[ExplicitEncoderDecoderPrompt]:
     return isinstance(prompt, dict) and "encoder_prompt" in prompt
 
 
diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py
index 6e8effd60274..b9acabeabd8d 100644
--- a/vllm/inputs/preprocess.py
+++ b/vllm/inputs/preprocess.py
@@ -67,11 +67,11 @@ def get_eos_token_id(self,
         return self.tokenizer.get_lora_tokenizer(lora_request).eos_token_id
 
     def get_decoder_start_token_id(self) -> Optional[int]:
-        '''
+        """
         Obtain the decoder start token id employed by an encoder/decoder
         model. Returns None for non-encoder/decoder models or if the
         model config is unavailable.
-        '''
+        """
 
         if not self.model_config.is_encoder_decoder:
             logger.warning_once(
@@ -79,14 +79,14 @@ def get_decoder_start_token_id(self) -> Optional[int]:
                 "this is not an encoder/decoder model.")
             return None
 
-        if (self.model_config is None or self.model_config.hf_config is None):
+        if self.model_config is None or self.model_config.hf_config is None:
             logger.warning_once(
                 "Using None for decoder start token id because "
                 "model config is not available.")
             return None
 
         dec_start_token_id = getattr(self.model_config.hf_config,
-                                     'decoder_start_token_id', None)
+                                     "decoder_start_token_id", None)
         if dec_start_token_id is None:
             logger.warning_once(
                 "Falling back on <BOS> for decoder start token "
@@ -97,7 +97,7 @@ def get_decoder_start_token_id(self) -> Optional[int]:
         return dec_start_token_id
 
     def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
-        '''
+        """
         Specifically for encoder/decoder models:
         generate a default decoder prompt for when
         the user specifies only the encoder prompt.
@@ -126,7 +126,7 @@ def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
         Returns:
 
         * prompt_token_ids
-        '''
+        """
 
         bos_token_id = self.get_bos_token_id()
         assert bos_token_id is not None
@@ -224,7 +224,10 @@ async def _tokenize_prompt_async(
         lora_request: Optional[LoRARequest],
         tokenization_kwargs: Optional[dict[str, Any]] = None,
     ) -> list[int]:
-        """Async version of {meth}`_tokenize_prompt`."""
+        """
+        Async version of
+        [`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt].
+        """
         tokenizer = self.get_tokenizer_group()
         tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)
 
@@ -287,7 +290,10 @@ async def _process_multimodal_async(
         lora_request: Optional[LoRARequest],
         return_mm_hashes: bool = False,
     ) -> MultiModalInputs:
-        """Async version of {meth}`_process_multimodal`."""
+        """
+        Async version of
+        [`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal].
+        """
         tokenizer = await self._get_mm_tokenizer_async(lora_request)
 
         mm_processor = self.mm_registry.create_processor(self.model_config,
@@ -472,7 +478,7 @@ def _prompt_to_llm_inputs(
 
         Returns:
 
-        * {class}`SingletonInputs` instance
+        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
         """
         parsed = parse_singleton_prompt(prompt)
 
@@ -508,7 +514,10 @@ async def _prompt_to_llm_inputs_async(
         lora_request: Optional[LoRARequest] = None,
         return_mm_hashes: bool = False,
     ) -> SingletonInputs:
-        """Async version of {meth}`_prompt_to_llm_inputs`."""
+        """
+        Async version of
+        [`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs].
+        """
         parsed = parse_singleton_prompt(prompt)
 
         if parsed["type"] == "embeds":
@@ -644,7 +653,9 @@ def _process_encoder_decoder_prompt(
     ) -> EncoderDecoderInputs:
         """
         For encoder/decoder models only:
-        Process an input prompt into an {class}`EncoderDecoderInputs` instance.
+        Process an input prompt into an
+        [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
+        instance.
 
         There are two types of input prompts:
         singleton prompts which carry only the
@@ -670,7 +681,8 @@ def _process_encoder_decoder_prompt(
 
         Returns:
 
-        * {class}`EncoderDecoderInputs` instance
+        * [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
+          instance
         """
         encoder_inputs: SingletonInputs
         decoder_inputs: Optional[SingletonInputs]
@@ -710,7 +722,10 @@ async def _process_encoder_decoder_prompt_async(
         prompt: PromptType,
         tokenization_kwargs: Optional[dict[str, Any]] = None,
     ) -> EncoderDecoderInputs:
-        """Async version of {meth}`_process_encoder_decoder_prompt`."""
+        """
+        Async version of
+        [`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt].
+        """
         encoder_inputs: SingletonInputs
         decoder_inputs: Optional[SingletonInputs]
 
@@ -778,7 +793,8 @@ def _process_decoder_only_prompt(
     ) -> DecoderOnlyInputs:
         """
         For decoder-only models:
-        Process an input prompt into an {class}`DecoderOnlyInputs` instance.
+        Process an input prompt into a
+        [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.
 
         Arguments:
 
@@ -789,7 +805,7 @@ def _process_decoder_only_prompt(
 
         Returns:
 
-        * {class}`DecoderOnlyInputs` instance
+        * [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
         """
 
         prompt_comps = self._prompt_to_llm_inputs(
@@ -812,7 +828,10 @@ async def _process_decoder_only_prompt_async(
         prompt_adapter_request: Optional[PromptAdapterRequest] = None,
         return_mm_hashes: bool = False,
     ) -> DecoderOnlyInputs:
-        """Async version of {meth}`_process_decoder_only_prompt`."""
+        """
+        Async version of
+        [`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt].
+        """
         prompt_comps = await self._prompt_to_llm_inputs_async(
             prompt,
             tokenization_kwargs=tokenization_kwargs,
@@ -863,7 +882,10 @@ async def preprocess_async(
         prompt_adapter_request: Optional[PromptAdapterRequest] = None,
         return_mm_hashes: bool = False,
     ) -> ProcessorInputs:
-        """Async version of {meth}`preprocess`."""
+        """
+        Async version of
+        [`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
+        """
         if self.model_config.is_encoder_decoder:
             assert not return_mm_hashes, (
                 "Multimodal hashes for encoder-decoder models should not be ",
diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py
index 148b3558c15e..f424a8f613ab 100644
--- a/vllm/inputs/registry.py
+++ b/vllm/inputs/registry.py
@@ -38,7 +38,7 @@ def get_hf_config(
     ) -> _C:
         """
         Get the HuggingFace configuration
-        ({class}`transformers.PretrainedConfig`) of the model,
+        (`transformers.PretrainedConfig`) of the model,
         additionally checking its type.
 
         Raises:
@@ -79,7 +79,7 @@ def get_hf_processor(
     ) -> _P:
         """
         Get the HuggingFace processor
-        ({class}`transformers.ProcessorMixin`) of the model,
+        (`transformers.ProcessorMixin`) of the model,
         additionally checking its type.
 
         Raises:
diff --git a/vllm/logger.py b/vllm/logger.py
index cf32041c5b70..fd16dd95bb1b 100644
--- a/vllm/logger.py
+++ b/vllm/logger.py
@@ -68,22 +68,22 @@ class _VllmLogger(Logger):
     """
     Note:
         This class is just to provide type information.
-        We actually patch the methods directly on the {class}`logging.Logger`
+        We actually patch the methods directly on the [`logging.Logger`][]
         instance to avoid conflicting with other libraries such as
         `intel_extension_for_pytorch.utils._logger`.
     """
 
     def info_once(self, msg: str, *args: Hashable) -> None:
         """
-        As {meth}`info`, but subsequent calls with the same message
-        are silently dropped.
+        As [`info`][logging.Logger.info], but subsequent calls with
+        the same message are silently dropped.
         """
         _print_info_once(self, msg, *args)
 
     def warning_once(self, msg: str, *args: Hashable) -> None:
         """
-        As {meth}`warning`, but subsequent calls with the same message
-        are silently dropped.
+        As [`warning`][logging.Logger.warning], but subsequent calls with
+        the same message are silently dropped.
         """
         _print_warning_once(self, msg, *args)
 
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index d6b910e4b75a..32375db0c8f1 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -228,17 +228,19 @@ def forward(
     ) -> Optional[SamplerOutput]:
         """
         Single-step scheduling:
-        * Perform GPU-side sampling computation & compute
-          GPU-side logprobs tensor
-        * Pythonize sampling result & logprobs tensor
+            * Perform GPU-side sampling computation & compute
+            GPU-side logprobs tensor
+            * Pythonize sampling result & logprobs tensor
 
         Multi-step scheduling:
-        * Perform GPU-side sampling computation & compute
-          GPU-side logprobs tensor
-        * Defer Pythonization of sampling result & logprobs
-          tensor
-        * Encapsulate arguments required for deferred Pythonization
-          in the {class}`SamplerOutput` structure
+            * Perform GPU-side sampling computation & compute
+            GPU-side logprobs tensor
+            * Defer Pythonization of sampling result & logprobs
+            tensor
+            * Encapsulate arguments required for deferred Pythonization
+            in the
+            [`SamplerOutput`][vllm.model_executor.layers.sampler.SamplerOutput]
+            structure
 
         Args:
             logits: (num_tokens, vocab_size).
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
index 8f33a3e29c60..8be8841c1f6c 100644
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -226,9 +226,11 @@ def forward(
         intermediate_tensors: Optional["IntermediateTensors"],
     ) -> Union[Tensor, "IntermediateTensors"]:
         """
-        Accept {class}`IntermediateTensors` when PP rank > 0.
+        Accept [`IntermediateTensors`][vllm.sequence.IntermediateTensors] when
+        PP rank > 0.
 
-        Return {class}`IntermediateTensors` only for the last PP rank.
+        Return [`IntermediateTensors`][vllm.sequence.IntermediateTensors] only
+        for the last PP rank.
         """
         ...
 
diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py
index e215582a37ac..640a2049a629 100644
--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -965,7 +965,7 @@ def select_tiling(
 
 class MolmoProcessorWrapper:
     """
-    Wraps {class}`MolmoProcessor` so that it can be called directly.
+    Wraps `MolmoProcessor` so that it can be called directly.
 
     The original definition can be found here:
     https://huggingface.co/allenai/Molmo-7B-D-0924/blob/main/preprocessing_molmo.py
diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py
index c664d2371e27..b269f712d344 100644
--- a/vllm/model_executor/models/pixtral.py
+++ b/vllm/model_executor/models/pixtral.py
@@ -65,14 +65,14 @@ class PixtralImagePixelInputs(TypedDict):
     """
     Shape: `(batch_size * num_images, num_channels, image_width, image_height)`
 
-    The result of stacking {attr}`ImageEncoding.tokens` from each prompt.
+    The result of stacking `ImageEncoding.tokens` from each prompt.
     """
 
 
 class PixtralProcessorAdapter:
     """
     Provide a HF-compatible interface for
-    {class}`mistral_common.tokens.tokenizers.multimodal.ImageEncoder`.
+    `mistral_common.tokens.tokenizers.multimodal.ImageEncoder`.
     """
 
     def __init__(self, tokenizer: MistralTokenizer) -> None:
diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py
index 57a66b793711..f5d242fdf1c2 100644
--- a/vllm/model_executor/models/qwen_vl.py
+++ b/vllm/model_executor/models/qwen_vl.py
@@ -382,7 +382,8 @@ def _get_tokenizer_without_image_pad(
         tokenizer: PreTrainedTokenizer) -> PreTrainedTokenizer:
     """
     The logic of adding image pad tokens should only be applied in
-    {class}`QwenVLProcessor`, so they are patched out here.
+    [`QwenVLProcessor`][vllm.model_executor.models.qwen_vl.QwenVLProcessor],
+    so they are patched out here.
 
     The definition of the wrapped tokenizer can be found here:
     https://huggingface.co/Qwen/Qwen-VL/blob/main/tokenization_qwen.py
diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
index 61115afa76d4..8c9cf0db6d5d 100644
--- a/vllm/model_executor/models/registry.py
+++ b/vllm/model_executor/models/registry.py
@@ -382,7 +382,7 @@ def register_model(
 
         `model_cls` can be either:
 
-        - A {class}`torch.nn.Module` class directly referencing the model.
+        - A [`torch.nn.Module`][] class directly referencing the model.
         - A string in the format `<module>:<class>` which can be used to
           lazily import the model. This is useful to avoid initializing CUDA
           when importing the model and thus the related error
diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py
index 027cd748e9de..3d821d3dc6b5 100644
--- a/vllm/model_executor/models/utils.py
+++ b/vllm/model_executor/models/utils.py
@@ -66,7 +66,7 @@ def apply(
 
 class AutoWeightsLoader:
     """
-    Helper class to load weights into a {class}`torch.nn.Module`. It is able
+    Helper class to load weights into a [`torch.nn.Module`][]. It is able
     to automatically detect child modules and parameters while iterating over
     the weights only once.
 
diff --git a/vllm/multimodal/__init__.py b/vllm/multimodal/__init__.py
index 22fee2f74712..815e34d5ac5d 100644
--- a/vllm/multimodal/__init__.py
+++ b/vllm/multimodal/__init__.py
@@ -8,11 +8,12 @@
 
 MULTIMODAL_REGISTRY = MultiModalRegistry()
 """
-The global {class}`~MultiModalRegistry` is used by model runners to
-dispatch data processing according to the target model.
+The global [`MultiModalRegistry`][vllm.multimodal.registry.MultiModalRegistry]
+is used by model runners to dispatch data processing according to the target
+model.
 
 Info:
-    [mm-processing][]
+    [mm_processing](../../../design/mm_processing.html)
 """
 
 __all__ = [
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index 71ef1a98e0d0..162dd52e3e73 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -29,14 +29,14 @@
 
 HfImageItem: TypeAlias = Union["Image", np.ndarray, "torch.Tensor"]
 """
-A {class}`transformers.image_utils.ImageInput` representing a single image
+A `transformers.image_utils.ImageInput` representing a single image
 item, which can be passed to a HuggingFace `ImageProcessor`.
 """
 
 HfVideoItem: TypeAlias = Union[list["Image"], np.ndarray, "torch.Tensor",
                                list[np.ndarray], list["torch.Tensor"]]
 """
-A {class}`transformers.image_utils.VideoInput` representing a single video
+A `transformers.image_utils.VideoInput` representing a single video
 item, which can be passed to a HuggingFace `VideoProcessor`.
 """
 
@@ -48,7 +48,7 @@
 
 ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"]
 """
-A {class}`transformers.image_utils.ImageInput` representing a single image
+A `transformers.image_utils.ImageInput` representing a single image
 item, which can be passed to a HuggingFace `ImageProcessor`.
 
 Alternatively, a 3-D tensor or batch of 2-D tensors,
@@ -58,7 +58,7 @@
 
 VideoItem: TypeAlias = Union[HfVideoItem, "torch.Tensor"]
 """
-A {class}`transformers.image_utils.VideoInput` representing a single video
+A `transformers.image_utils.VideoInput` representing a single video
 item, which can be passed to a HuggingFace `VideoProcessor`.
 
 Alternatively, a 3-D tensor or batch of 2-D tensors,
@@ -108,7 +108,8 @@ class MultiModalDataBuiltins(TypedDict, total=False):
 """
 A dictionary containing an entry for each modality type to input.
 
-The built-in modalities are defined by {class}`MultiModalDataBuiltins`.
+The built-in modalities are defined by
+[`MultiModalDataBuiltins`][vllm.multimodal.inputs.MultiModalDataBuiltins].
 """
 
 
@@ -169,7 +170,8 @@ def __eq__(self, other: object) -> bool:
 
 
 def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
-    """Equality check between {data}`NestedTensors` objects."""
+    """Equality check between
+    [`NestedTensors`][vllm.multimodal.inputs.NestedTensors] objects."""
     if isinstance(a, torch.Tensor):
         return isinstance(b, torch.Tensor) and torch.equal(a, b)
     elif isinstance(b, torch.Tensor):
@@ -189,7 +191,7 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
 BatchedTensorInputs: TypeAlias = Mapping[str, NestedTensors]
 """
 A dictionary containing nested tensors which have been batched via
-{meth}`MultiModalKwargs.batch`.
+[`MultiModalKwargs.batch`][vllm.multimodal.inputs.MultiModalKwargs.batch].
 """
 
 
@@ -197,7 +199,7 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool:
 class MultiModalFieldElem:
     """
     Represents a keyword argument corresponding to a multi-modal item
-    in {class}`MultiModalKwargs`.
+    in [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs].
     """
 
     modality: str
@@ -208,13 +210,15 @@ class MultiModalFieldElem:
 
     key: str
     """
-    The key of this field in {class}`MultiModalKwargs`,
+    The key of this field in
+    [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
     i.e. the name of the keyword argument to be passed to the model.
     """
 
     data: NestedTensors
     """
-    The tensor data of this field in {class}`MultiModalKwargs`,
+    The tensor data of this field in
+    [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs],
     i.e. the value of the keyword argument to be passed to the model.
     """
 
@@ -237,7 +241,8 @@ def __eq__(self, other: object) -> bool:
 class BaseMultiModalField(ABC):
     """
     Defines how to interpret tensor data belonging to a keyword argument in
-    {class}`MultiModalKwargs` for multiple multi-modal items, and vice versa.
+    [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs] for multiple
+    multi-modal items, and vice versa.
     """
 
     def _field_factory(self, *, modality: str, key: str):
@@ -262,10 +267,12 @@ def build_elems(
         data: NestedTensors,
     ) -> Sequence[MultiModalFieldElem]:
         """
-        Construct {class}`MultiModalFieldElem` instances to represent
-        the provided data.
+        Construct
+        [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
+        instances to represent the provided data.
 
-        This is the inverse of {meth}`reduce_data`.
+        This is the inverse of
+        [`reduce_data`][vllm.multimodal.inputs.BaseMultiModalField.reduce_data].
         """
         raise NotImplementedError
 
@@ -275,9 +282,11 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
 
     def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
         """
-        Merge the data from multiple instances of {class}`MultiModalFieldElem`.
+        Merge the data from multiple instances of
+        [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem].
 
-        This is the inverse of {meth}`build_elems`.
+        This is the inverse of
+        [`build_elems`][vllm.multimodal.inputs.BaseMultiModalField.build_elems].
         """
         field_types = [type(item.field) for item in elems]
         if len(set(field_types)) > 1:
@@ -290,7 +299,7 @@ def reduce_data(self, elems: list[MultiModalFieldElem]) -> NestedTensors:
 class MultiModalBatchedField(BaseMultiModalField):
     """
     Info:
-        [MultiModalFieldConfig.batched][]
+        [`MultiModalFieldConfig.batched`][vllm.multimodal.inputs.MultiModalFieldConfig.batched]
     """
 
     def build_elems(
@@ -320,8 +329,8 @@ def _reduce_data(self, batch: list[NestedTensors]) -> NestedTensors:
 class MultiModalFlatField(BaseMultiModalField):
     """
     Info:
-        [MultiModalFieldConfig.flat][]
-        [MultiModalFieldConfig.flat_from_sizes][]
+        [`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
+        [`MultiModalFieldConfig.flat_from_sizes`][vllm.multimodal.inputs.MultiModalFieldConfig.flat_from_sizes]
     """
     slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
     dim: int = 0
@@ -362,7 +371,7 @@ def _expect_same_shape(tensor: torch.Tensor):
 class MultiModalSharedField(BaseMultiModalField):
     """
     Info:
-        [MultiModalFieldConfig.shared][]
+        [`MultiModalFieldConfig.shared`][vllm.multimodal.inputs.MultiModalFieldConfig.shared]
     """
     batch_size: int
 
@@ -508,7 +517,7 @@ def flat_from_sizes(modality: str,
         ```
 
         Info:
-            [MultiModalFieldConfig.flat][]
+            [`MultiModalFieldConfig.flat`][vllm.multimodal.inputs.MultiModalFieldConfig.flat]
         """
 
         if size_per_item.ndim != 1:
@@ -572,8 +581,10 @@ def build_elems(
 
 class MultiModalKwargsItem(UserDict[str, MultiModalFieldElem]):
     """
-    A collection of {class}`MultiModalFieldElem`
-    corresponding to a data item in {class}`MultiModalDataItems`.
+    A collection of
+    [`MultiModalFieldElem`][vllm.multimodal.inputs.MultiModalFieldElem]
+    corresponding to a data item in
+    [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
     """
 
     @staticmethod
@@ -592,11 +603,13 @@ def modality(self) -> str:
 class MultiModalKwargs(UserDict[str, NestedTensors]):
     """
     A dictionary that represents the keyword arguments to
-    {meth}`~torch.nn.Module.forward`.
+    [`torch.nn.Module.forward`][].
 
     The metadata `items` enables us to obtain the keyword arguments
-    corresponding to each data item in {class}`MultiModalDataItems`, via
-    {meth}`get_item` and {meth}`get_items`.
+    corresponding to each data item in
+    [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems], via
+    [`get_item`][vllm.multimodal.inputs.MultiModalKwargs.get_item] and
+    [`get_items`][vllm.multimodal.inputs.MultiModalKwargs.get_items].
     """
 
     @staticmethod
@@ -635,7 +648,9 @@ def from_hf_inputs(
 
     @staticmethod
     def from_items(items: Sequence[MultiModalKwargsItem]):
-        """Construct a new {class}`MultiModalKwargs` from multiple items."""
+        """Construct a new
+        [`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs]
+        from multiple items."""
         elems_by_key = defaultdict[str, list[MultiModalFieldElem]](list)
         for item in items:
             for key, elem in item.items():
@@ -800,7 +815,7 @@ def get_items(self, modality: str) -> Sequence[MultiModalKwargsItem]:
 class MultiModalInputs(TypedDict):
     """
     Represents the outputs of
-    {class}`vllm.multimodal.processing.BaseMultiModalProcessor`,
+    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor],
     ready to be passed to vLLM internals.
     """
 
@@ -836,7 +851,8 @@ class MultiModalInputs(TypedDict):
 
 class MultiModalEncDecInputs(MultiModalInputs):
     """
-    Represents the outputs of {class}`vllm.multimodal.EncDecMultiModalProcessor`
+    Represents the outputs of
+    [`EncDecMultiModalProcessor`][vllm.multimodal.processing.EncDecMultiModalProcessor]
     ready to be passed to vLLM internals.
     """
 
diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py
index 6e9ec9555802..63af842747a5 100644
--- a/vllm/multimodal/parse.py
+++ b/vllm/multimodal/parse.py
@@ -28,7 +28,8 @@
 
 class ModalityDataItems(ABC, Generic[_T, _I]):
     """
-    Represents data items for a modality in {class}`MultiModalDataItems`.
+    Represents data items for a modality in
+    [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
     """
 
     def __init__(self, data: _T, modality: str) -> None:
@@ -251,15 +252,15 @@ def __init__(self, data: Union[torch.Tensor, list[torch.Tensor]]) -> None:
 
 class MultiModalDataItems(UserDict[str, ModalityDataItems[Any, Any]]):
     """
-    As {data}`~vllm.multimodal.inputs.MultiModalDataDict`, but normalized
-    such that each entry corresponds to a list.
+    As [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict], but
+    normalized such that each entry corresponds to a list.
     """
 
     def get_count(self, modality: str, *, strict: bool = True) -> int:
         """
         Get the number of data items belonging to a modality.
 
-        If `strict=False`, return `0` instead of raising {exc}`KeyError`
+        If `strict=False`, return `0` instead of raising [`KeyError`][]
         even if the modality is not found.
         """
         if modality not in self:
@@ -305,8 +306,8 @@ def get_items(
 
 class MultiModalDataParser:
     """
-    Parses {data}`~vllm.multimodal.inputs.MultiModalDataDict` into
-    {class}`MultiModalDataItems`.
+    Parses [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
+    into [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems].
 
     Args:
         target_sr (float, optional): Enables automatic resampling of audio
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index f7a3c327982d..aa7914e40cbf 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -114,13 +114,14 @@ class PromptUpdateDetails(Generic[_S]):
 
     is_embed: Optional[Callable[["_BoundPromptSequence"], torch.Tensor]] = None
     """
-    Given {attr}`full`, return a boolean mask of shape `(len(full),)`
-    indicating which positions of `full` to assign embeddings to.
+    Given [`full`][vllm.multimodal.processing.PromptUpdateDetails.full],
+    return a boolean mask of shape `(len(full),)` indicating which positions
+    of `full` to assign embeddings to.
 
     `None` (default) means to assign embeddings to all positions of `full`.
 
     The embeddings are obtained by calling
-    {class}`SupportsMultiModal.get_multimodal_embeddings`.
+    [`SupportsMultiModal.get_multimodal_embeddings`][vllm.model_executor.models.interfaces.SupportsMultiModal.get_multimodal_embeddings].
     """
 
     @staticmethod
@@ -159,13 +160,15 @@ def select_token_id(
 The token sequence or text that are part of the update.
 
 If only part of the content corresponds to feature placeholders, you can
-use {class}`PromptUpdateDetails` to specify which part.
+use [`PromptUpdateDetails`][vllm.multimodal.processing.PromptUpdateDetails] to
+specify which part.
 """
 
 PromptUpdateContent = Union[Callable[[int], PromptUpdateInfo],
                             PromptUpdateInfo]
 """
-Given the index of the processed item within {attr}`modality`,
+Given the index of the processed item within
+[`modality`][vllm.multimodal.processing.PromptUpdate.modality],
 output the corresponding token sequence (or text).
 
 For convenience, you can directly pass in the token sequence (or text)
@@ -260,8 +263,10 @@ class PromptInsertion(PromptUpdate):
 
     insertion: PromptUpdateContent = field(repr=False)
     """
-    Given the index of the processed item within {attr}`modality`,
-    output the token sequence (or text) to insert right after {attr}`target`.
+    Given the index of the processed item within
+    [`modality`][vllm.multimodal.processing.PromptUpdate.modality],
+    output the token sequence (or text) to insert right after
+    [`target`][vllm.multimodal.processing.PromptUpdate.target].
 
     For convenience, you can directly pass in the token sequence (or text)
     instead of a function if it does not depend on the input.
@@ -332,8 +337,10 @@ class PromptReplacement(PromptUpdate):
 
     replacement: PromptUpdateContent = field(repr=False)
     """
-    Given the index of the processed item within {attr}`modality`,
-    output the token sequence (or text) to replace {attr}`target`.
+    Given the index of the processed item within
+    [`modality`][vllm.multimodal.processing.PromptUpdate.modality],
+    output the token sequence (or text) to replace
+    [`target`][vllm.multimodal.processing.PromptUpdate.target].
 
     For convenience, you can directly pass in the token sequence (or text)
     instead of a function if it does not depend on the input.
@@ -387,14 +394,16 @@ def modality(self) -> str:
 
 
 def full_groupby_modality(values: Iterable[_M]) -> ItemsView[str, list[_M]]:
-    """Convenience function to apply [full_groupby][] based on modality."""
+    """Convenience function to apply [`full_groupby`][vllm.utils.full_groupby]
+    based on modality."""
     return full_groupby(values, key=lambda x: x.modality)
 
 
 @dataclass
 class _BoundPromptSequence:
     """
-    A {data}`_PromptSeq` bound to a tokenizer to automatically
+    A [`_PromptSeq`][vllm.multimodal.processing.PromptSeq] bound
+    to a tokenizer to automatically
     convert between token sequence and text representations.
     """
     tokenizer: AnyTokenizer = field(repr=False)
@@ -446,9 +455,11 @@ class _BoundPromptContent:
 @dataclass
 class BoundPromptUpdate:
     """
-    A {class}`PromptUpdate` bound to a tokenizer to automatically convert
-    {attr}`target` and the result of {meth}`get_content` between
-    token sequence and text representations.
+    A [`PromptUpdate`][vllm.multimodal.processing.PromptUpdate] bound
+    to a tokenizer to automatically convert
+    [`target`][vllm.multimodal.processing.PromptUpdate.target] and the result of
+    [`get_content`][vllm.multimodal.processing.BoundPromptUpdate.get_content]
+    between token sequence and text representations.
     """
     _origin: PromptUpdate
     tokenizer: AnyTokenizer = field(repr=False)
@@ -482,7 +493,8 @@ def mode(self) -> UpdateMode:
 
     def get_content(self, item_idx: int) -> _BoundPromptContent:
         """
-        Given the index of the processed item within {attr}`modality`,
+        Given the index of the processed item within
+        [`modality`][vllm.multimodal.processing.PromptUpdate.modality],
         output the token sequence (or text) to update.
         """
         content = self.content
@@ -1019,7 +1031,8 @@ def put(
     ) -> None:
         """
         Put a processed multi-modal item into the cache
-        according to its dependencies (see {meth}`get`).
+        according to its dependencies
+        (see [`get`][vllm.multimodal.processing.ProcessingCache.get]).
         """
         cache_key = MultiModalHasher.hash_kwargs(model_id=model_id,
                                                  **{modality: input_item},
@@ -1091,7 +1104,8 @@ def get_allowed_mm_limits(self) -> Mapping[str, int]:
 
 MultiModalHashes = dict[str, list[str]]
 """
-A collection of hashes with a similar structure as {class}`MultiModalKwargs`.
+A collection of hashes with a similar structure as
+[`MultiModalKwargs`][vllm.multimodal.inputs.MultiModalKwargs].
 """
 
 
@@ -1099,7 +1113,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
     """
     Abstract base class to process multi-modal inputs to be used in vLLM.
 
-    Not to be confused with {class}`transformers.ProcessorMixin`.
+    Not to be confused with `transformers.ProcessorMixin`.
     """
 
     def __init__(self,
@@ -1126,10 +1140,12 @@ def __call__(
     def _get_data_parser(self) -> MultiModalDataParser:
         """
         Construct a parser to preprocess multi-modal data items
-        before passing them to {meth}`_get_hf_mm_data`.
+        before passing them to
+        [`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
 
         You can support additional modalities by creating a subclass
-        of {class}`MultiModalDataParser` that has additional subparsers.
+        of [`MultiModalDataParser`][vllm.multimodal.parse.MultiModalDataParser]
+        that has additional subparsers.
         """
         return MultiModalDataParser()
 
@@ -1138,8 +1154,11 @@ def _to_mm_items(
         mm_data: MultiModalDataDict,
     ) -> MultiModalDataItems:
         """
-        Normalize {class}`MultiModalDataDict` to {class}`MultiModalDataItems`
-        before passing them to {meth}`_get_hf_mm_data`.
+        Normalize
+        [`MultiModalDataDict`][vllm.multimodal.inputs.MultiModalDataDict]
+        to [`MultiModalDataItems`][vllm.multimodal.parse.MultiModalDataItems]
+        before passing them to
+        [`_get_hf_mm_data`][vllm.multimodal.processing.BaseMultiModalProcessor._get_hf_mm_data].
         """
         mm_items = self.data_parser.parse_mm_data(mm_data)
         supported_mm_limits = self.info.get_supported_mm_limits()
@@ -1191,7 +1210,8 @@ def _get_prompt_updates(
         inputs.
 
         Moreover, this information is critical to determine the token positions
-        in order to construct  {class}`~vllm-multimodal.input.PlaceholderRange`
+        in order to construct
+        [`PlaceholderRange`][vllm.multimodal.inputs.PlaceholderRange]
         for each multi-modal item.
         """
         raise NotImplementedError
@@ -1315,7 +1335,9 @@ def _apply_hf_processor_tokens_only(
         Most HF processors accept prompt text but not prompt tokens.
         If the HF processor adds or removes tokens that are not related to
         multi-modal data, you should override this method so it is consistent
-        with the output of {meth}`_apply_hf_processor_text_only` on the
+        with the output of
+        [`_apply_hf_processor_text_only`][vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_text_only]
+        on the
         corresponding text.
         """
         return prompt_tokens
@@ -1330,7 +1352,8 @@ def _apply_hf_processor_mm_only(
 
         Since HF processor requires that text and multi-modal items
         correspond to each other, we generate dummy text using
-        {class}`DummyInputsBuilder` to go along with the multi-modal data.
+        [`DummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder]
+        to go along with the multi-modal data.
         """
         mm_counts = mm_items.get_all_counts()
 
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
index b5875124c126..76112a095c6d 100644
--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@@ -25,7 +25,7 @@
 class ProcessorInputs:
     """
     Represents the keyword arguments to
-    {meth}`vllm.multimodal.processing.BaseMultiModalProcessor.apply`.
+    [`vllm.multimodal.processing.BaseMultiModalProcessor.apply`][].
     """
     prompt_text: str
     mm_data: MultiModalDataDict
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index 0d0d4a4363f4..b9f5cee922a7 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -29,7 +29,11 @@
 
 
 class ProcessingInfoFactory(Protocol[_I_co]):
-    """Constructs a {class}`MultiModalProcessor` instance from the context."""
+    """
+    Constructs a
+    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
+    instance from the context.
+    """
 
     def __call__(
         self,
@@ -40,7 +44,9 @@ def __call__(
 
 class DummyInputsBuilderFactory(Protocol[_I]):
     """
-    Constructs a {class}`BaseDummyInputsBuilder` instance from the context.
+    Constructs a
+    [`BaseDummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder]
+    instance from the context.
     """
 
     def __call__(self, info: _I) -> BaseDummyInputsBuilder[_I]:
@@ -48,7 +54,11 @@ def __call__(self, info: _I) -> BaseDummyInputsBuilder[_I]:
 
 
 class MultiModalProcessorFactory(Protocol[_I]):
-    """Constructs a {class}`MultiModalProcessor` instance from the context."""
+    """
+    Constructs a
+    [`BaseMultiModalProcessor`][vllm.multimodal.processing.BaseMultiModalProcessor]
+    instance from the context.
+    """
 
     def __call__(
         self,
@@ -155,8 +165,6 @@ def get_max_tokens_by_modality(
         """
         Get the maximum number of tokens from each modality
         for profiling the memory usage of a model.
-
-        See {meth}`MultiModalPlugin.get_max_multimodal_tokens` for more details.
         """
         mm_limits = self.get_mm_limits_per_prompt(model_config)
 
@@ -170,8 +178,6 @@ def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int:
         """
         Get the maximum number of multi-modal tokens
         for profiling the memory usage of a model.
-
-        See {meth}`MultiModalPlugin.get_max_multimodal_tokens` for more details.
         """
         return sum(self.get_max_tokens_by_modality(model_config).values())
 
@@ -213,9 +219,6 @@ def register_processor(
 
         When the model receives multi-modal data, the provided function is
         invoked to transform the data into a dictionary of model inputs.
-
-        Info:
-            [mm-processing][]
         """
 
         def wrapper(model_cls: N) -> N:
@@ -258,9 +261,6 @@ def create_processor(
     ) -> BaseMultiModalProcessor[BaseProcessingInfo]:
         """
         Create a multi-modal processor for a specific model and tokenizer.
-
-        Info:
-            [mm-processing][]
         """
         if not model_config.is_multimodal_model:
             raise ValueError(f"{model_config.model} is not a multimodal model")
diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py
index aef5f669ac68..9ddba67bff70 100644
--- a/vllm/multimodal/utils.py
+++ b/vllm/multimodal/utils.py
@@ -259,7 +259,8 @@ def fetch_image_embedding(
 
 
 global_media_connector = MediaConnector()
-"""The global {class}`MediaConnector` instance used by vLLM."""
+"""The global [`MediaConnector`][vllm.multimodal.utils.MediaConnector]
+instance used by vLLM."""
 
 fetch_audio = global_media_connector.fetch_audio
 fetch_image = global_media_connector.fetch_image
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 646faa944565..504c3b42a75d 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -84,7 +84,7 @@ def as_version_str(self) -> str:
 
     def to_int(self) -> int:
         """
-        Express device capability as an integer ``<major><minor>``.
+        Express device capability as an integer `<major><minor>`.
 
         It is assumed that the minor version is always a single digit.
         """
@@ -206,10 +206,11 @@ def has_device_capability(
         """
         Test whether this platform is compatible with a device capability.
 
-        The ``capability`` argument can either be:
+        The `capability` argument can either be:
 
-        - A tuple ``(major, minor)``.
-        - An integer ``<major><minor>``. (See {meth}`DeviceCapability.to_int`)
+        - A tuple `(major, minor)`.
+        - An integer `<major><minor>`. (See
+        [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
         """
         current_capability = cls.get_device_capability(device_id=device_id)
         if current_capability is None:
diff --git a/vllm/sequence.py b/vllm/sequence.py
index e9212a82506e..9b3f06b2faf0 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -27,7 +27,7 @@
 
 
 def array_full(token_id: int, count: int):
-    """{class}`array` equivalent of [numpy.full][]."""
+    """[`array`][] equivalent of [numpy.full][]."""
     return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count
 
 
@@ -192,8 +192,8 @@ class SequenceData(msgspec.Struct,
     def from_prompt_token_counts(
             *token_counts: tuple[int, int]) -> "SequenceData":
         """
-        Construct a {class}`SequenceData` instance by concatenating
-        prompt token sequences.
+        Construct a [`SequenceData`][vllm.sequence.SequenceData] instance
+        by concatenating prompt token sequences.
 
         Each tuple represents one token sequence, expressed in the form
         `(token_id, count)`.
@@ -216,8 +216,8 @@ def from_seqs(
         prompt_embeds: Optional[torch.Tensor] = None,
     ) -> "SequenceData":
         """
-        Construct a {class}`SequenceData` instance from prompt and output
-        token sequences.
+        Construct a [`SequenceData`][vllm.sequence.SequenceData] instance
+        from prompt and output token sequences.
         """
         prompt_token_ids_arr = array(VLLM_TOKEN_ID_ARRAY_TYPE,
                                      prompt_token_ids)
@@ -452,9 +452,11 @@ def __repr__(self) -> str:
 class Sequence:
     """Stores the data, status, and block information of a sequence.
 
-    The sequence is constructed from the {data}`DecoderOnlyInputs`
-    (for decoder-only) or {data}`EncoderDecoderInputs` (for encoder-decoder)
-    instance passed in through the `inputs` constructor argument.
+    The sequence is constructed from the
+    [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] (for decoder-only)
+    or [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
+    (for encoder-decoder) instance passed in through the `inputs`
+    constructor argument.
 
     Args:
         seq_id: The ID of the sequence.
diff --git a/vllm/utils.py b/vllm/utils.py
index 50296aada4cc..aa932b429d4b 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1004,7 +1004,7 @@ def flatten_2d_lists(lists: Iterable[Iterable[T]]) -> list[T]:
 
 def full_groupby(values: Iterable[_V], *, key: Callable[[_V], _K]):
     """
-    Unlike {class}`itertools.groupby`, groups are not broken by
+    Unlike [`itertools.groupby`][], groups are not broken by
     non-contiguous data.
     """
     groups = defaultdict[_K, list[_V]](list)
@@ -1924,7 +1924,8 @@ class _PlaceholderBase:
     Disallows downstream usage of placeholder modules.
 
     We need to explicitly override each dunder method because
-    {meth}`__getattr__` is not called when they are accessed.
+    [`__getattr__`][vllm.utils._PlaceholderBase.__getattr__]
+    is not called when they are accessed.
 
     Info:
         [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index 28503a0a926d..91548a52cfc7 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -10,7 +10,7 @@ def sanity_check_mm_encoder_outputs(
 ) -> None:
     """
     Perform sanity checks for the result of
-    {meth}`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`.
+    [`vllm.model_executor.models.SupportsMultiModal.get_multimodal_embeddings`][].
     """
     assert isinstance(mm_embeddings, (list, tuple, torch.Tensor)), (
         "Expected multimodal embeddings to be a list/tuple of 2D tensors, "
@@ -39,7 +39,7 @@ def scatter_mm_placeholders(
     Scatter the multimodal embeddings into a contiguous tensor that represents
     the placeholder tokens.
 
-    {class}`vllm.multimodal.processing.PromptUpdateDetails.is_embed`.
+    [`vllm.multimodal.processing.PromptUpdateDetails.is_embed`][].
 
     Args:
         embeds: The multimodal embeddings.
diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py
index d9cf2055ed56..f8d5acf586c5 100644
--- a/vllm/worker/multi_step_model_runner.py
+++ b/vllm/worker/multi_step_model_runner.py
@@ -733,12 +733,13 @@ def _pythonize_sampler_output(
     logprobs_tensor: Optional[torch.Tensor],
     cache: Optional[PythonizationCache],
 ) -> None:
-    """ This function is only called when the output tensors are ready. 
-    See {class}`ModelOutput`. 
-    
-    Modifies `output.outputs` and `pinned_sampled_token_buffer` in-place, 
+    """ This function is only called when the output tensors are ready.
+    See [`ModelOutput`][vllm.worker.multi_step_model_runner.ModelOutput].
+
+    Modifies `output.outputs` and `pinned_sampled_token_buffer` in-place,
     adding a Pythonized output data structure
-    ({class}`CompletionSequenceGroupOutput`) for each {class}`SequenceGroup`.
+    ([`CompletionSequenceGroupOutput`][vllm.sequence.CompletionSequenceGroupOutput])
+    for each [`SequenceGroup`][vllm.sequence.SequenceGroup].
 
     Args:
       model_input