Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/full_tests/ci_gsm8k_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,15 @@ echo "Skipping GSM8K on QWEN3-30B-A3B"
# exit -1
# fi
# echo "Test with QWEN3-30B-A3B passed"

# multimodal-support with qwen2.5-vl
echo "Testing Qwen2.5-VL-7B"
echo "VLLM_SKIP_WARMUP=true VLLM_CONTIGUOUS_PA=False PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 \
python -u vllm-gaudi/tests/models/language/generation/generation_mm.py --model-card-path vllm-gaudi/tests/full_tests/model_cards/qwen2.5-vl-7b.yaml"
VLLM_SKIP_WARMUP=true VLLM_CONTIGUOUS_PA=False PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 \
python -u vllm-gaudi/tests/models/language/generation/generation_mm.py --model-card-path vllm-gaudi/tests/full_tests/model_cards/qwen2.5-vl-7b.yaml
if [ $? -ne 0 ]; then
echo "Error: Test failed for multimodal-support with qwen2.5-vl-7b" >&2
exit -1
fi
echo "Test with multimodal-support with qwen2.5-vl-7b passed"
20 changes: 20 additions & 0 deletions tests/full_tests/model_cards/qwen2.5-vl-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
test_config: # List of test configurations. - modality is test for <modality>
- modality: image # modality (currently supports image and video)
extra_engine_args: # Optional extra arguments for the engine
mm_processor_kwargs:
min_pixels: 784
max_pixels: 1003520
fps: 1
input_data_config: # Configuration for the input data
num_prompts: 4 # Number of samples to run
media_source: default # Source of the data to load
- modality: video
extra_engine_args:
mm_processor_kwargs:
min_pixels: 784
max_pixels: 1003520
fps: 1
input_data_config:
num_prompts: 2
media_source: default
184 changes: 184 additions & 0 deletions tests/models/language/generation/generation_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
from argparse import ArgumentParser
from vllm import LLM, EngineArgs, SamplingParams
from vllm.assets.image import ImageAsset
from vllm.assets.video import VideoAsset
from vllm.multimodal.image import convert_image_mode
from dataclasses import asdict
from typing import Union
from PIL import Image
from dataclasses import dataclass
import yaml
from vllm_gaudi.extension.logger import logger as init_logger

logger = init_logger()


@dataclass
class PROMPT_DATA:
_questions = {
"image": [
"What is the most prominent object in this image?",
"Describe the scene in the image.",
"What is the weather like in the image?",
"Write a short poem about this image."
],
"video": [
"Describe this video",
"Which movie would you associate this video with?"
]
}

_data = {
"image":
lambda source: convert_image_mode(
ImageAsset("cherry_blossom").pil_image
if source == "default" else Image.open(source), "RGB"),
"video":
lambda source: VideoAsset(name="baby_reading"
if source == "default" else source,
num_frames=16).np_ndarrays
}

def __post_init__(self):
self._questions = self._questions
self._data = self._data

def get_prompts(self,
modality: str = "image",
media_source: str = "default",
num_prompts: int = 1,
skip_vision_data=False):
if modality == "image":
pholder = "<|image_pad|>"
elif modality == "video":
pholder = "<|video_pad|>"
else:
raise ValueError(f"Unsupported modality: {modality}."
" Supported modality: [image, video]")
questions = self._questions[modality]
prompts = [
("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
f"<|im_start|>user\n<|vision_start|>{pholder}<|vision_end|>"
f"{question}<|im_end|>\n"
"<|im_start|>assistant\n") for question in questions
]

data = self._data[modality](media_source)
inputs = [{
"prompt": prompts[i % len(prompts)],
"multi_modal_data": {
modality: data
},
} if not skip_vision_data else {
"prompt": questions[i % len(questions)],
} for i in range(num_prompts)]

return inputs


def run_model(model_name: str, inputs: Union[dict, list[dict]], modality: str,
**extra_engine_args):
# Default mm_processor_kwargs
# mm_processor_kwargs={
# "min_pixels": 28 * 28,
# "max_pixels": 1280 * 28 * 28,
# "fps": 1,
# }
passed_mm_processor_kwargs = extra_engine_args.get("mm_processor_kwargs",
{})
passed_mm_processor_kwargs.setdefault("min_pixels", 28 * 28)
passed_mm_processor_kwargs.setdefault("max_pixels", 1280 * 28 * 28)
passed_mm_processor_kwargs.setdefault("fps", 1)
extra_engine_args.update(
{"mm_processor_kwargs": passed_mm_processor_kwargs})

extra_engine_args.setdefault("max_model_len", 32768)
extra_engine_args.setdefault("max_num_seqs", 5)
extra_engine_args.setdefault("limit_mm_per_prompt", {modality: 1})

sampling_params = SamplingParams(
temperature=0.0,
max_tokens=64,
)

engine_args = EngineArgs(model=model_name, **extra_engine_args)

engine_args = asdict(engine_args)
llm = LLM(**engine_args)

outputs = llm.generate(
inputs,
sampling_params=sampling_params,
use_tqdm=False, # Disable tqdm for CI tests
)
return outputs


def start_test(model_card_path: str):
with open(model_card_path) as f:
model_card = yaml.safe_load(f)

model_name = model_card.get("model_name", "Qwen/Qwen2.5-VL-7B-Instruct")
test_config = model_card.get("test_config", [])
if not test_config:
logger.warning("No test configurations found.")
return

for config in test_config:
modality = "image" # Ensure modality is always defined
try:
modality = config.get("modality", "image")
extra_engine_args = config.get("extra_engine_args", {})
input_data_config = config.get("input_data_config", {})
num_prompts = input_data_config.get("num_prompts", 1)
media_source = input_data_config.get("media_source", "default")

logger.info(
"================================================\n"
"Running test with configs:\n"
"modality: %(modality)s\n"
"input_data_config: %(input_data_config)s\n"
"extra_engine_args: %(extra_engine_args)s\n"
"================================================",
dict(modality=modality,
input_data_config=input_data_config,
extra_engine_args=extra_engine_args))

data = PROMPT_DATA()
inputs = data.get_prompts(modality=modality,
media_source=media_source,
num_prompts=num_prompts)

logger.info(
"*** Questions for modality %(modality)s: %(questions)s",
dict(modality=modality, questions=data._questions[modality]))
responses = run_model(model_name, inputs, modality,
**extra_engine_args)
for response in responses:
print(f"{response.outputs[0].text}")
print("=" * 80)
except Exception as e:
logger.error("Error during test with modality %(modality)s: %(e)s",
dict(modality=modality, e=e))

raise


def main():
parser = ArgumentParser()
parser.add_argument("--model-card-path",
required=True,
help="Path to .yaml file describing model parameters")
args = parser.parse_args()
start_test(args.model_card_path)


if __name__ == "__main__":
try:
main()
except Exception:
import os
import traceback
print("An error occurred during generation:")
traceback.print_exc()
os._exit(1)
Loading