|
| 1 | +# SPDX-License-Identifier: Apache-2.0 |
| 2 | +from contextlib import nullcontext |
| 3 | + |
| 4 | +import pytest |
| 5 | + |
| 6 | +from vllm.entrypoints.llm import LLM |
| 7 | +from vllm.sampling_params import SamplingParams |
| 8 | + |
| 9 | + |
| 10 | +@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) |
| 11 | +def test_skip_tokenizer_initialization(model: str): |
| 12 | + # This test checks if the flag skip_tokenizer_init skips the initialization |
| 13 | + # of tokenizer and detokenizer. The generated output is expected to contain |
| 14 | + # token ids. |
| 15 | + llm = LLM( |
| 16 | + model=model, |
| 17 | + skip_tokenizer_init=True, |
| 18 | + enforce_eager=True, |
| 19 | + ) |
| 20 | + sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True) |
| 21 | + |
| 22 | + with pytest.raises(ValueError, match="cannot pass text prompts when"): |
| 23 | + llm.generate("abc", sampling_params) |
| 24 | + |
| 25 | + outputs = llm.generate({"prompt_token_ids": [1, 2, 3]}, |
| 26 | + sampling_params=sampling_params) |
| 27 | + assert len(outputs) > 0 |
| 28 | + completions = outputs[0].outputs |
| 29 | + assert len(completions) > 0 |
| 30 | + assert completions[0].text == "" |
| 31 | + assert completions[0].token_ids |
| 32 | + |
| 33 | + |
| 34 | +@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) |
| 35 | +@pytest.mark.parametrize("enable_prompt_embeds", [True, False]) |
| 36 | +def test_enable_prompt_embeds(hf_runner, model: str, |
| 37 | + enable_prompt_embeds: bool): |
| 38 | + prompt = "abc" |
| 39 | + |
| 40 | + with hf_runner(model) as hf_model: |
| 41 | + token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids |
| 42 | + token_ids = token_ids.to(hf_model.model.device) |
| 43 | + |
| 44 | + embed_layer = hf_model.model.get_input_embeddings() |
| 45 | + prompt_embeds = embed_layer(token_ids).squeeze(0) |
| 46 | + |
| 47 | + ctx = (nullcontext() if enable_prompt_embeds else pytest.raises( |
| 48 | + ValueError, match="set `--enable-prompt-embeds`")) |
| 49 | + |
| 50 | + # This test checks if the flag skip_tokenizer_init skips the initialization |
| 51 | + # of tokenizer and detokenizer. The generated output is expected to contain |
| 52 | + # token ids. |
| 53 | + llm = LLM( |
| 54 | + model=model, |
| 55 | + enable_prompt_embeds=enable_prompt_embeds, |
| 56 | + enforce_eager=True, |
| 57 | + ) |
| 58 | + |
| 59 | + with ctx: |
| 60 | + llm.generate({"prompt_embeds": prompt_embeds}) |
0 commit comments