-
-
Couldn't load subscription status.
- Fork 10.8k
Description
Your current environment
https://modelscope.cn/models/AIDC-AI/Ovis2-34B
https://modelscope.cn/models/AIDC-AI/Ovis2-16B
the same problem.
root@node37:/disk1/Ovis2-16B# more docker-compose.yml
version: '3.3'
services:
vllm
vllm-openai:
image: vllm/vllm-openai:v0.7.3
container_name: Ovis2-16B
restart: always
runtime: nvidia
ports:
- 8007:8000
volumes:
- /disk1/:/models
command: >
--model /models/Ovis2-16B
--trust_remote_code
--tokenizer_mode="auto"
--dtype=bfloat16
--max_num_seqs=128
--tensor_parallel_size=1
--gpu-memory-utilization=0.9
--max-model-len=32768
--served-model-name=Ovis2-16B
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
device_ids: [ "3" ]
ipc: host
networks:
vllm:
root@node37:/disk1/Ovis2-16B#
root@node37:/disk1/Ovis2-16B# docker compose -f docker-compose.yml down
root@node37:/disk1/Ovis2-16B# docker compose -f docker-compose.yml up -d
[+] Running 2/2
✔ Network ovis2-16b_default Created 0.1s
✔ Container Ovis2-16B Started 0.5s
root@node37:/disk1/Ovis2-16B# docker logs -f Ovis2-16B
INFO 03-02 20:27:49 init.py:207] Automatically detected platform cuda.
INFO 03-02 20:27:49 api_server.py:912] vLLM API server version 0.7.3
INFO 03-02 20:27:49 api_server.py:913] args: Namespace(host=None, port=8000, uvicorn_log_level='info', allow_credentials=False, allowed_origins=[''], allowed_methods=[''], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, enable_reasoning=False, reasoning_parser=None, tool_call_parser=None, tool_parser_plugin='', model='/models/Ovis2-16B', task='auto', tokenizer=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, download_dir=None, load_format='auto', config_format=<ConfigFormat.AUTO: 'auto'>, dtype='bfloat16', kv_cache_dtype='auto', max_model_len=32768, guided_decoding_backend='xgrammar', logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, max_parallel_loading_workers=None, ray_workers_use_nsight=False, block_size=None, enable_prefix_caching=None, disable_sliding_window=False, use_v2_block_manager=True, num_lookahead_slots=0, seed=0, swap_space=4, cpu_offload_gb=0, gpu_memory_utilization=0.9, num_gpu_blocks_override=None, max_num_batched_tokens=None, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, max_num_seqs=128, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, disable_custom_all_reduce=False, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config=None, limit_mm_per_prompt=None, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=False, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=False, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', num_scheduler_steps=1, multi_step_stream_outputs=True, scheduler_delay_factor=0.0, enable_chunked_prefill=None, speculative_model=None, speculative_model_quantization=None, num_speculative_tokens=None, speculative_disable_mqa_scorer=False, speculative_draft_tensor_parallel_size=None, speculative_max_model_len=None, speculative_disable_by_batch_size=None, ngram_prompt_lookup_max=None, ngram_prompt_lookup_min=None, spec_decoding_acceptance_method='rejection_sampler', typical_acceptance_sampler_posterior_threshold=None, typical_acceptance_sampler_posterior_alpha=None, disable_logprobs_during_spec_decoding=None, model_loader_extra_config=None, ignore_patterns=[], preemption_mode=None, served_model_name=['Ovis2-16B'], qlora_adapter_name_or_path=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, scheduling_policy='fcfs', scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', generation_config=None, override_generation_config=None, enable_sleep_mode=False, calculate_kv_scales=False, additional_config=None, disable_log_requests=False, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False)
INFO 03-02 20:27:49 api_server.py:209] Started engine process with PID 45
INFO 03-02 20:27:55 init.py:207] Automatically detected platform cuda.
INFO 03-02 20:27:58 config.py:549] This model supports multiple tasks: {'score', 'reward', 'generate', 'classify', 'embed'}. Defaulting to 'generate'.
INFO 03-02 20:28:03 config.py:549] This model supports multiple tasks: {'score', 'embed', 'classify', 'reward', 'generate'}. Defaulting to 'generate'.
INFO 03-02 20:28:03 llm_engine.py:234] Initializing a V0 LLM engine (v0.7.3) with config: model='/models/Ovis2-16B', speculative_config=None, tokenizer='/models/Ovis2-16B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=Ovis2-16B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=False, chunked_prefill_enabled=False, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"splitting_ops":[],"compile_sizes":[],"cudagraph_capture_sizes":[128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":128}, use_cached_outputs=True,
INFO 03-02 20:28:05 model_runner.py:1110] Starting to load model /models/Ovis2-16B...
Process SpawnProcess-1:
ERROR 03-02 20:28:05 engine.py:400] Ovis has no vLLM implementation and the Transformers implementation is not compatible with vLLM.
ERROR 03-02 20:28:05 engine.py:400] Traceback (most recent call last):
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
ERROR 03-02 20:28:05 engine.py:400] engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 124, in from_engine_args
ERROR 03-02 20:28:05 engine.py:400] return cls(ipc_path=ipc_path,
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 76, in init
ERROR 03-02 20:28:05 engine.py:400] self.engine = LLMEngine(*args, **kwargs)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/engine/llm_engine.py", line 273, in init
ERROR 03-02 20:28:05 engine.py:400] self.model_executor = executor_class(vllm_config=vllm_config, )
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 52, in init
ERROR 03-02 20:28:05 engine.py:400] self._init_executor()
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor
ERROR 03-02 20:28:05 engine.py:400] self.collective_rpc("load_model")
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 03-02 20:28:05 engine.py:400] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/utils.py", line 2196, in run_method
ERROR 03-02 20:28:05 engine.py:400] return func(*args, **kwargs)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker.py", line 183, in load_model
ERROR 03-02 20:28:05 engine.py:400] self.model_runner.load_model()
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/worker/model_runner.py", line 1112, in load_model
ERROR 03-02 20:28:05 engine.py:400] self.model = get_model(vllm_config=self.vllm_config)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
ERROR 03-02 20:28:05 engine.py:400] return loader.load_model(vllm_config=vllm_config)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 406, in load_model
ERROR 03-02 20:28:05 engine.py:400] model = _initialize_model(vllm_config=vllm_config)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 115, in _initialize_model
ERROR 03-02 20:28:05 engine.py:400] model_class, _ = get_model_architecture(model_config)
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/utils.py", line 106, in get_model_architecture
ERROR 03-02 20:28:05 engine.py:400] architectures = resolve_transformers_fallback(model_config,
ERROR 03-02 20:28:05 engine.py:400] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 03-02 20:28:05 engine.py:400] File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/utils.py", line 75, in resolve_transformers_fallback
ERROR 03-02 20:28:05 engine.py:400] raise ValueError(
ERROR 03-02 20:28:05 engine.py:400] ValueError: Ovis has no vLLM implementation and the Transformers implementation is not compatible with vLLM.
Traceback (most recent call last):
File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 402, in run_mp_engine
raise e
File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 124, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/engine/multiprocessing/engine.py", line 76, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/executor_base.py", line 52, in init
self._init_executor()
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 47, in _init_executor
self.collective_rpc("load_model")
File "/usr/local/lib/python3.12/dist-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/utils.py", line 2196, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/usr/local/lib/python3.12/dist-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 406, in load_model
model = _initialize_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/loader.py", line 115, in _initialize_model
model_class, _ = get_model_architecture(model_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/utils.py", line 106, in get_model_architecture
architectures = resolve_transformers_fallback(model_config,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/utils.py", line 75, in resolve_transformers_fallback
raise ValueError(
ValueError: Ovis has no vLLM implementation and the Transformers implementation is not compatible with vLLM.
[rank0]:[W302 20:28:06.955876472 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
Traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 991, in
uvloop.run(run_server(args))
File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.12/dist-packages/uvloop/init.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 947, in run_server
async with build_async_engine_client(args) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 139, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 233, in build_async_engine_client_from_engine_args
raise RuntimeError(
RuntimeError: Engine process failed to start. See stack trace for the root cause.
root@node37:/disk1/Ovis2-16B#
root@node37:/disk1/Ovis2-16B#
🐛 Describe the bug
https://modelscope.cn/models/AIDC-AI/Ovis2-16B/feedback
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.

