From 138dcaff34c27f492898d301e8c3836f6f1a229a Mon Sep 17 00:00:00 2001
From: Punit Vara <punitvara@gmail.com>
Date: Wed, 17 Sep 2025 13:34:11 +0530
Subject: [PATCH 1/2] [Doc] Fix cross-reference warnings

- Remove or correct broken cross-references in docstrings to resolve mkdocs_autorefs warnings.
- Wrap bracketed data structure notations in backticks to prevent false cross-references.

related to issue: #25020

Signed-off-by: Punit Vara <punitvara@gmail.com>
---
 vllm/benchmarks/datasets.py                          |  3 ++-
 .../device_communicators/shm_object_storage.py       |  8 ++++----
 .../model_executor/layers/mamba/ops/causal_conv1d.py | 12 +++++++-----
 vllm/model_executor/models/mistral3.py               |  2 +-
 vllm/multimodal/profiling.py                         |  2 +-
 vllm/v1/core/kv_cache_manager.py                     |  5 +++--
 6 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py
index 1831539a6adb..1cab40802c39 100644
--- a/vllm/benchmarks/datasets.py
+++ b/vllm/benchmarks/datasets.py
@@ -171,7 +171,8 @@ def get_random_lora_request(
                 If `None`, LoRA is not used.
 
         Returns:
-            A new [LoRARequest][] (or `None` if not applicable).
+            A new [`LoRARequest`][vllm.lora.request.LoRARequest]
+            (or `None` if not applicable).
         """
         if max_loras is None or lora_path is None:
             return None
diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py
index 3fac104bda1e..352e7525d4c8 100644
--- a/vllm/distributed/device_communicators/shm_object_storage.py
+++ b/vllm/distributed/device_communicators/shm_object_storage.py
@@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer:
     - Maintains metadata for each allocated buffer chunk in the writer process
     - Supports custom "is_free_fn" functions to determine when buffers can be
       reused
-    - Each buffer chunk contains: [4-byte id][4-byte size][actual_data]
+    - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]`
     
     Key Concepts:
     - monotonic_id_start/end: Track the range of active buffer IDs
@@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer:
     - Writer handles garbage collection (free_buf) based on reader feedback
     
     Memory Layout per Buffer Chunk:
-    [4-byte monotonic_id][4-byte chunk_size][actual_data...]
+    `[4-byte monotonic_id][4-byte chunk_size][actual_data...]`
     ^metadata_start                         ^data_start
     
     The monotonic_id ensures data integrity - readers can verify they're
@@ -185,7 +185,7 @@ def allocate_buf(self, size: int) -> tuple[int, int]:
         '''
         Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory.
         Memory layout:
-        [4-byte monotonic_id][4-byte size][buffer data...]
+        `[4-byte monotonic_id][4-byte size][buffer data...]`
         '''
         assert self.is_writer, "Only the writer can allocate buffers."
         assert size > 0, "Size must be greater than 0"
@@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage:
       allocation
 
     Memory Layout per Object:
-    [4-byte reference_count][metadata_size][serialized_object_data]
+    `[4-byte reference_count][metadata_size][serialized_object_data]`
     
     Thread Safety:
     - Writer operations (put, clear) are single-threaded by design
diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
index 7e3ea561fd29..c7af1d7c2ac3 100644
--- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
@@ -927,11 +927,13 @@ def causal_conv1d_update(
     validate_data=False,
 ):
     """
-    x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim)
-        [shape=2: single token prediction]
-        [shape=3: single or multiple tokens prediction]
-        [shape=2 with num_tokens: continuous batching, where num_tokens is the
-                                  total tokens of all sequences in that batch]
+    x: Input tensor which can take the following shapes:
+
+    - `[batch, dim]` - single token prediction
+    - `[batch, dim, seqlen]` - single or multiple tokens prediction
+    - `[num_tokens, dim]` - continuous batching, where num_tokens is
+    the total tokens of all sequences in that batch
+
     conv_state: (..., dim, state_len), where state_len >= width - 1
     weight: (dim, width)
     bias: (dim,)
diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py
index 09479012a03a..d15776a39362 100644
--- a/vllm/model_executor/models/mistral3.py
+++ b/vllm/model_executor/models/mistral3.py
@@ -583,7 +583,7 @@ def forward(
             inputs_embeds: Optional tensor of input embeddings.
 
         Info:
-            [Mistral3ImagePixelInputs][]
+            [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs]
         """
         if intermediate_tensors is not None:
             inputs_embeds = None
diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py
index bad6c0c3d9db..fbbc55d3524c 100644
--- a/vllm/multimodal/profiling.py
+++ b/vllm/multimodal/profiling.py
@@ -301,7 +301,7 @@ def get_mm_max_contiguous_tokens(
         Returns the maximum length of the multimodal (image placeholders+text)
         tokens, including any break/text tokens in-between image embeddings.
 
-        <im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>
+        `<im_start> [IMG] [IMG] [IMG] <row_break> [IMG] [IMG] [IMG] <im_end>`
         Returns 9, even when the number of image embeddings is 6.
         
         This is important to take into account when profiling and
diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index 3a0fbb5e5c41..401327f727a4 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -24,8 +24,9 @@ class KVCacheBlocks:
     """
     blocks: tuple[list[KVCacheBlock], ...]
     """
-    blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens.
-    We don't use block of tokens as the outer dimension because it assumes all
+    `blocks[i][j]` refers to the i-th kv_cache_group
+    and the j-th block of tokens.We don't use block of
+    tokens as the outer dimension because it assumes all
     kv_cache_groups have the same number of blocks, which is true for now but 
     will be broken if we want to give different block_size to different 
     kv_cache_groups in the future.

From ef094ddccf71e87f60798ad4ff1967850512d58b Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Thu, 18 Sep 2025 09:34:00 +0100
Subject: [PATCH 2/2] Update
 vllm/model_executor/layers/mamba/ops/causal_conv1d.py

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 vllm/model_executor/layers/mamba/ops/causal_conv1d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
index c7af1d7c2ac3..2a88fa661da0 100644
--- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
+++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py
@@ -932,7 +932,7 @@ def causal_conv1d_update(
     - `[batch, dim]` - single token prediction
     - `[batch, dim, seqlen]` - single or multiple tokens prediction
     - `[num_tokens, dim]` - continuous batching, where num_tokens is
-    the total tokens of all sequences in that batch
+        the total tokens of all sequences in that batch
 
     conv_state: (..., dim, state_len), where state_len >= width - 1
     weight: (dim, width)