Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/v1/structured_output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
class StructuredOutputManager:

def __init__(self, vllm_config: VllmConfig):
self.vocab_size = vllm_config.model_config.get_vocab_size()
self.vllm_config = vllm_config
self.init_complete = False

Expand All @@ -41,6 +40,7 @@ def _delayed_init(self):
tokenizer_group.ping()

tokenizer = tokenizer_group.get_lora_tokenizer(None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not directly related to this PR but it looks like structured output currently isn't compatible with custom vocab LoRAs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sg, will add to the list for compatibility

Fwiw i don't think it ever worked with LoRA

self.vocab_size = tokenizer.max_token_id
if isinstance(tokenizer, MistralTokenizer):
# NOTE: ideally, xgrammar should handle this accordingly.
# refer to https://github.com/mlc-ai/xgrammar/blob/d77c0a0173ef14779c918e3be7966ba852f7910f/python/xgrammar/tokenizer_info.py#L98
Expand Down