xuebwang-amd
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/offline_inference/encoder_decoder.py‎
Lines changed: 2 additions & 0 deletions b/‎examples/offline_inference/encoder_decoder.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/offline_inference/encoder_decoder_multimodal.py‎
Lines changed: 3 additions & 0 deletions b/‎examples/offline_inference/encoder_decoder_multimodal.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/encoder_decoder/test_e2e_correctness.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/encoder_decoder/test_e2e_correctness.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/entrypoints/openai/test_encoder_decoder.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/entrypoints/openai/test_encoder_decoder.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/models/language/generation/test_bart.py‎
Lines changed: 2 additions & 0 deletions b/‎tests/models/language/generation/test_bart.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/models/multimodal/generation/test_whisper.py‎
Lines changed: 1 addition & 2 deletions b/‎tests/models/multimodal/generation/test_whisper.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎tests/models/multimodal/processing/test_tensor_schema.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/models/multimodal/processing/test_tensor_schema.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/models/test_initialization.py‎
Lines changed: 6 additions & 0 deletions b/‎tests/models/test_initialization.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/v1/test_oracle.py‎
Lines changed: 0 additions & 1 deletion b/‎tests/v1/test_oracle.py‎
Lines changed: 0 additions & 1 deletion
@@ -321,7 +321,6 @@ steps:
     - python3 offline_inference/vision_language_pooling.py --seed 0
     - python3 offline_inference/vision_language_multi_image.py --seed 0
     - VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
-    - python3 offline_inference/encoder_decoder.py
     - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
     - python3 offline_inference/basic/classify.py
     - python3 offline_inference/basic/embed.py
@@ -644,7 +643,7 @@ steps:
     - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
     - pip freeze | grep -E 'torch'
     - pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
-    - cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
+    - cd .. && VLLM_WORKER_MULTIPROC_METHOD=spawn pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model  # Otherwise, mp_method="spawn" doesn't work
 
 - label: Multi-Modal Models Test (Extended) 1
   mirror_hardwares: [amdexperimental]
@@ -818,7 +817,8 @@ steps:
   # Avoid importing model tests that cause CUDA reinitialization error
   - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
   - pytest models/language -v -s -m 'distributed(num_gpus=2)'
-  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)'
+  - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py 
+  - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)' 
   # test sequence parallel
   - pytest -v -s distributed/test_sequence_parallel.py
   # this test fails consistently.
 
@@ -5,6 +5,8 @@
 encoder/decoder models, specifically BART and mBART.
 
 This script is refactored to allow model selection via command-line arguments.
+
+NOTE: This example is not yet supported in V1.
 """
 
 import argparse
 
@@ -5,6 +5,7 @@
 the explicit/implicit prompt format on enc-dec LMMs for text generation.
 """
 
+import os
 import time
 from collections.abc import Sequence
 from dataclasses import asdict
@@ -130,6 +131,8 @@ def run_mllama():
 
 
 def run_whisper():
+    os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
     engine_args = EngineArgs(
         model="openai/whisper-large-v3-turbo",
         max_model_len=448,
 
@@ -63,6 +63,7 @@ def clear_cache():
     current_platform.is_cpu(),
     reason="CPU backend is not currently supported with encoder/decoder models"
 )
+@pytest.mark.skip(reason="bart not supported in V1")
 def test_encoder_decoder_e2e(
     hf_runner,
     vllm_runner,
 
@@ -30,6 +30,7 @@ async def client(server):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.skip(reason="bart is not yet supported in V1")
 async def test_single_completion(client: openai.AsyncOpenAI, model_name: str):
     completion = await client.completions.create(model=model_name,
                                                  prompt="Hello, my name is",
 
@@ -178,6 +178,7 @@ def run_test(
 @pytest.mark.parametrize("max_tokens", [64])
 @pytest.mark.parametrize("num_logprobs", [5])
 @pytest.mark.parametrize("decoder_prompt_type", list(DecoderPromptType))
+@pytest.mark.skip(reason="bart not supported in V1")
 def test_models(hf_runner, vllm_runner, example_encoder_decoder_prompts, model,
                 dtype, max_tokens, num_logprobs, decoder_prompt_type) -> None:
 
@@ -201,6 +202,7 @@ def test_models(hf_runner, vllm_runner, example_encoder_decoder_prompts, model,
 @pytest.mark.parametrize("max_tokens", [64])
 @pytest.mark.parametrize("num_logprobs", [5])
 @pytest.mark.parametrize("decoder_prompt_type", [DecoderPromptType.CUSTOM])
+@pytest.mark.skip(reason="bart not supported in V1")
 def test_models_distributed(hf_runner, vllm_runner,
                             example_encoder_decoder_prompts,
                             distributed_executor_backend, model, dtype,
 
@@ -122,8 +122,7 @@ def run_test(
 
 
 @pytest.mark.core_model
-@pytest.mark.parametrize(
-    "model", ["openai/whisper-small", "openai/whisper-large-v3-turbo"])
+@pytest.mark.parametrize("model", ["openai/whisper-large-v3-turbo"])
 @create_new_process_for_each_test()
 def test_models(vllm_runner, model) -> None:
     run_test(
 
@@ -31,6 +31,7 @@
 
 ARCH_TO_SKIP = {
     "MolmoForCausalLM": "incompatible requirements",
+    "Florence2ForConditionalGeneration": "not supported in V1",
 }
 ARCH_NEEDS_EXTRAS = [
     "InternVLChatModel",
 
@@ -68,6 +68,12 @@ def _initialize_kv_caches_v1(self, vllm_config):
             # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
             # L4 supports FA3.
             m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
+        if model_arch == "Florence2ForConditionalGeneration":
+            # An encoder-decoder model that's V0-only. Just skip it
+            # since V0 is about to be removed.
+            pytest.skip("Skipping Florence2ForConditionalGeneration")
+        if model_arch == "WhisperForConditionalGeneration":
+            m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
         LLM(
             model_info.default,
             tokenizer=model_info.tokenizer,
 
@@ -10,7 +10,6 @@
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 
 UNSUPPORTED_MODELS_V1 = [
-    "openai/whisper-large-v3",  # transcription
     "facebook/bart-large-cnn",  # encoder decoder
 ]
Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,7 @@ def clear_cache():`
`63`	`63`	`current_platform.is_cpu(),`
`64`	`64`	`reason="CPU backend is not currently supported with encoder/decoder models"`
`65`	`65`	`)`
	`66`	`+@pytest.mark.skip(reason="bart not supported in V1")`
`66`	`67`	`def test_encoder_decoder_e2e(`
`67`	`68`	`hf_runner,`
`68`	`69`	`vllm_runner,`
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`
`32`	`32`	`ARCH_TO_SKIP = {`
`33`	`33`	`"MolmoForCausalLM": "incompatible requirements",`
	`34`	`+ "Florence2ForConditionalGeneration": "not supported in V1",`
`34`	`35`	`}`
`35`	`36`	`ARCH_NEEDS_EXTRAS = [`
`36`	`37`	`"InternVLChatModel",`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,6 @@`
`10`	`10`	`from vllm.engine.async_llm_engine import AsyncLLMEngine`
`11`	`11`
`12`	`12`	`UNSUPPORTED_MODELS_V1 = [`
`13`		`- "openai/whisper-large-v3", # transcription`
`14`	`13`	`"facebook/bart-large-cnn", # encoder decoder`
`15`	`14`	`]`
`16`	`15`