From 39dbd7b53d3daca0708438e4e518c51ea6a4e405 Mon Sep 17 00:00:00 2001 From: Dan Lord Date: Wed, 6 Sep 2023 12:38:09 -0700 Subject: [PATCH 1/2] Add option to ModelConfig to keep special tokens in the output instead of always skipping them. --- vllm/config.py | 2 ++ vllm/engine/arg_utils.py | 10 +++++++--- vllm/engine/llm_engine.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 2e8d58411181c..f5462dcab9866 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -39,6 +39,7 @@ def __init__( tokenizer: str, tokenizer_mode: str, trust_remote_code: bool, + keep_special_tokens: bool, download_dir: Optional[str], use_np_weights: bool, use_dummy_weights: bool, @@ -49,6 +50,7 @@ def __init__( self.tokenizer = tokenizer self.tokenizer_mode = tokenizer_mode self.trust_remote_code = trust_remote_code + self.keep_special_tokens = keep_special_tokens self.download_dir = download_dir self.use_np_weights = use_np_weights self.use_dummy_weights = use_dummy_weights diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 99fe593b4cb01..3c97b0fe90a81 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -14,6 +14,7 @@ class EngineArgs: tokenizer: Optional[str] = None tokenizer_mode: str = 'auto' trust_remote_code: bool = False + keep_special_tokens: bool = False download_dir: Optional[str] = None use_np_weights: bool = False use_dummy_weights: bool = False @@ -130,6 +131,9 @@ def add_cli_args( parser.add_argument('--disable-log-stats', action='store_true', help='disable logging statistics') + parser.add_argument('--keep-special-tokens', + action='store_true', + help='keep special tokens in the output') return parser @classmethod @@ -146,9 +150,9 @@ def create_engine_configs( # Initialize the configs. model_config = ModelConfig(self.model, self.tokenizer, self.tokenizer_mode, self.trust_remote_code, - self.download_dir, self.use_np_weights, - self.use_dummy_weights, self.dtype, - self.seed) + self.keep_special_tokens, self.download_dir, + self.use_np_weights, self.use_dummy_weights, + self.dtype, self.seed) cache_config = CacheConfig(self.block_size, self.gpu_memory_utilization, self.swap_space) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 4ea443d8451d1..cad1deea1e232 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -628,7 +628,7 @@ def _decode_sequence(self, seq: Sequence) -> None: self.tokenizer, seq.output_tokens, seq.get_last_token_id(), - skip_special_tokens=True, + skip_special_tokens=not self.model_config.keep_special_tokens, ) if new_token is not None: seq.output_tokens.append(new_token) From 10bf5884d89fa007d8cbc7d33c3288f97c6c3154 Mon Sep 17 00:00:00 2001 From: Dan Lord Date: Wed, 6 Sep 2023 12:41:15 -0700 Subject: [PATCH 2/2] Update documentation. --- vllm/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/config.py b/vllm/config.py index f5462dcab9866..2a165edfd70ee 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -22,6 +22,7 @@ class ModelConfig: available, and "slow" will always use the slow tokenizer. trust_remote_code: Trust remote code (e.g., from HuggingFace) when downloading the model and tokenizer. + keep_special_tokens: Keep special tokens in the generated text. download_dir: Directory to download and load the weights, default to the default cache directory of huggingface. use_np_weights: Save a numpy copy of model weights for faster loading.