| 
8 | 8 | from vllm.config import VllmConfig  | 
9 | 9 | from vllm.logger import init_logger  | 
10 | 10 | from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs  | 
 | 11 | +from vllm.utils import LazyLoader  | 
11 | 12 | from vllm.v1.structured_output.backend_guidance import GuidanceBackend  | 
12 | 13 | from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,  | 
13 | 14 |                                                      StructuredOutputGrammar)  | 
 | 
17 | 18 |     import numpy as np  | 
18 | 19 |     import numpy.typing as npt  | 
19 | 20 |     import torch  | 
 | 21 | +    import xgrammar.testing as xgr_testing  | 
20 | 22 | 
 
  | 
21 | 23 |     from vllm.v1.request import Request  | 
 | 24 | +else:  | 
 | 25 | +    xgr_testing = LazyLoader('xgr_testing', globals(), 'xgrammar.testing')  | 
22 | 26 | 
 
  | 
23 | 27 | logger = init_logger(__name__)  | 
24 | 28 | 
 
  | 
@@ -122,3 +126,28 @@ def grammar_bitmask(  | 
122 | 126 |         # np.ndarray, because that is much more efficient for serialization  | 
123 | 127 |         # and deserialization when sending this to the GPU workers.  | 
124 | 128 |         return bitmask_tensor.numpy()  | 
 | 129 | + | 
 | 130 | +    def jump_forward_tokens(self, request, batch_index) -> list[int]:  | 
 | 131 | +        """  | 
 | 132 | +        For xgrammar-based structured output requests, repeatedly check if the grammar bitmask  | 
 | 133 | +        is a single-token bitmask, and if so, advance the FSM and collect all jump-forward tokens.  | 
 | 134 | +        Returns the list of jump-forward token IDs.  | 
 | 135 | +        """  | 
 | 136 | +        so_request = request.structured_output_request  | 
 | 137 | +        if so_request is None or so_request.grammar is None:  | 
 | 138 | +            return []  | 
 | 139 | + | 
 | 140 | +        jump_tokens = []  | 
 | 141 | +        bitmask = torch.zeros(so_request.grammar.vocab_size, dtype=torch.int32)  | 
 | 142 | +        so_request.grammar.allocate_token_bitmask(1)  | 
 | 143 | +        so_request.grammar.fill_bitmask(bitmask, 0)  | 
 | 144 | +        is_single, unique_token_id = xgr_testing._is_single_token_bitmask(  | 
 | 145 | +            bitmask, so_request.grammar.vocab_size, 0)  | 
 | 146 | +        while is_single and unique_token_id != -1:  | 
 | 147 | +            jump_tokens.append(unique_token_id)  | 
 | 148 | +            so_request.grammar.accept_tokens(request.request_id,  | 
 | 149 | +                                             [unique_token_id])  | 
 | 150 | +            so_request.grammar.fill_bitmask(bitmask, batch_index)  | 
 | 151 | +            is_single, unique_token_id = xgr_testing._is_single_token_bitmask(  | 
 | 152 | +                bitmask, so_request.grammar.vocab_size, 0)  | 
 | 153 | +        return jump_tokens  | 
0 commit comments