diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 8f56a6e8a9..8f7f6e68ba 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -179,8 +179,7 @@ jobs: run: | if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py - # guided decoding doesn't work, fix it later - # pytest -sv tests/singlecard/test_guided_decoding.py.py + pytest -sv tests/singlecard/test_guided_decoding.py # test_ascend_config.py should be ran separately because it will regenerate the global config many times. pytest -sv tests/singlecard/test_ascend_config.py pytest -sv tests/singlecard/test_camem.py @@ -216,8 +215,7 @@ jobs: run: | if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py - # guided decoding doesn't work, fix it later - # pytest -sv tests/singlecard/test_guided_decoding.py.py + pytest -sv tests/singlecard/test_guided_decoding.py pytest -sv tests/singlecard/test_camem.py # test_ascend_config.py should be ran separately because it will regenerate the global config many times. pytest -sv tests/singlecard/test_ascend_config.py diff --git a/tests/singlecard/test_guided_decoding.py b/tests/singlecard/test_guided_decoding.py index 0725812a28..1258cf1d41 100644 --- a/tests/singlecard/test_guided_decoding.py +++ b/tests/singlecard/test_guided_decoding.py @@ -34,7 +34,7 @@ "lm-format-enforcer", "xgrammar", ] -GuidedDecodingBackendV1 = ["xgrammar", "guidance:disable-any-whitespace"] +GuidedDecodingBackendV1 = ["xgrammar", "guidance"] GuidedDecodingBackend = list( set(GuidedDecodingBackendV0 + GuidedDecodingBackendV1)) @@ -94,6 +94,10 @@ def test_guided_json_completion(guided_decoding_backend: str, # xgrammar does not support json schema, will fall back to outlines, skip it pytest.skip( f"{guided_decoding_backend} will fall back to outlines, skip it") + if guided_decoding_backend == "outlines": + pytest.skip( + f"{guided_decoding_backend} will take up too much time for json " + "completion, skip it") if guided_decoding_backend not in GuidedDecodingBackendV0 and os.getenv( "VLLM_USE_V1") == "0": # guidance does not support on v0, skip it