File tree Expand file tree Collapse file tree 3 files changed +7
-2
lines changed
.buildkite/scripts/hardware_ci Expand file tree Collapse file tree 3 files changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
1919 vllm-tpu /bin/bash -c " python3 -m pip install git+https://github.com/thuml/depyf.git \
2020 && python3 -m pip install pytest pytest-asyncio tpu-info \
2121 && python3 -m pip install lm_eval[api]==0.4.4 \
22+ && export VLLM_XLA_CACHE_PATH= \
2223 && export VLLM_USE_V1=1 \
2324 && export VLLM_XLA_CHECK_RECOMPILATION=1 \
2425 && echo HARDWARE \
Original file line number Diff line number Diff line change 1313
1414from vllm .entrypoints .llm import LLM
1515from vllm .outputs import RequestOutput
16+ from vllm .platforms import current_platform
1617from vllm .sampling_params import GuidedDecodingParams , SamplingParams
1718
1819PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
@@ -63,10 +64,13 @@ def test_structured_output(
6364):
6465 monkeypatch .setenv ("VLLM_USE_V1" , "1" )
6566
67+ # Don't use eager execution on TPUs because we want to test for no
68+ # recompilation at runtime
69+ enforce_eager = bool (not current_platform .is_tpu ())
6670 # Use a single LLM instance for several scenarios to
6771 # speed up the test suite.
6872 llm = LLM (model = model_name ,
69- enforce_eager = True ,
73+ enforce_eager = enforce_eager ,
7074 max_model_len = 1024 ,
7175 guided_decoding_backend = guided_decoding_backend ,
7276 tokenizer_mode = tokenizer_mode )
Original file line number Diff line number Diff line change @@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
2323 different results.
2424 """
2525 llm = LLM (model_name ,
26- enforce_eager = True ,
26+ enforce_eager = False ,
2727 max_num_seqs = 1 ,
2828 max_model_len = 512 ,
2929 max_num_batched_tokens = 512 )
You can’t perform that action at this time.
0 commit comments