You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config if total_num_attention_heads % tensor_parallel_size != 0: ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
#2303
Closed
1 of 3 tasks
alanOO7 opened this issue
Sep 13, 2024
· 3 comments
2024-09-13 11:23:21,489 transformers.models.auto.image_processing_auto 1112 INFO Could not locate the image processor configuration file, will try to use the model config instead.
2024-09-13 11:23:21,491 xinference.core.worker 108 ERROR Failed to load model qwen2-instruct-GRhW312s-1-0
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
2024-09-13 11:23:21,514 xinference.core.worker 108 ERROR [request 3f6fd318-71fd-11ef-a59f-0242ac110002] Leave launch_builtin_model, error: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero, elapsed time: 3 s
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 69, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
2024-09-13 11:23:21,520 xinference.api.restful_api 1 ERROR [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 876, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1026, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 990, in _launch_model
await _launch_one_model(rep_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 969, in _launch_one_model
await worker_ref.launch_builtin_model(
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 69, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
docker / docker
pip install / 通过 pip install 安装
installation from source / 从源码安装
Version info / 版本信息
latest
The command used to start Xinference / 用以启动 xinference 的命令
docker run -v /home/mcn/xinference:/root/xinference -e XINFERENCE_HOME=/root/xinference -p 9998:9997 --gpus all xprobe/xinference:latest xinference-local -H 0.0.0.0
Reproduction / 复现过程
qwen2-instruct
vllm
pytorch
7
none
auto
1
点确定后报错
Server error: 500 - [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Expected behavior / 期待表现
期待解答
The text was updated successfully, but these errors were encountered:
System Info / 系統信息
docker latest 版本 系统ubunt 24lts ,宿主 安装了cuda12.4
2024-09-13 11:23:17,673 xinference.core.worker 108 INFO [request 3f6fd318-71fd-11ef-a59f-0242ac110002] Enter launch_builtin_model, args: <xinference.core.worker.WorkerActor object at 0x7a2411034e00>, kwargs: model_uid=qwen2-instruct-GRhW312s-1-0,model_name=qwen2-instruct,model_size_in_billions=7,model_format=pytorch,quantization=none,model_engine=vLLM,model_type=LLM,n_gpu=auto,request_limits=None,peft_model_config=None,gpu_idx=None,download_hub=None,model_path=None
2024-09-13 11:23:21,469 xinference.model.llm.llm_family 108 INFO Caching from Modelscope: qwen/Qwen2-7B-Instruct
2024-09-13 11:23:21,469 xinference.model.llm.llm_family 108 INFO Cache /root/xinference/cache/qwen2-instruct-pytorch-7b exists
/usr/local/lib/python3.10/dist-packages/torch/cuda/init.py:654: UserWarning: Can't initialize NVML
warnings.warn("Can't initialize NVML")
2024-09-13 11:23:21,486 xinference.model.llm.vllm.core 1112 INFO Loading qwen2-instruct-GRhW312s with following model config: {'tokenizer_mode': 'auto', 'trust_remote_code': True, 'tensor_parallel_size': 0, 'block_size': 16, 'swap_space': 4, 'gpu_memory_utilization': 0.9, 'max_num_seqs': 256, 'quantization': None, 'max_model_len': 4096}Enable lora: False. Lora count: 0.
2024-09-13 11:23:21,488 transformers.configuration_utils 1112 INFO loading configuration file /root/xinference/cache/qwen2-instruct-pytorch-7b/config.json
2024-09-13 11:23:21,489 transformers.configuration_utils 1112 INFO Model config Qwen2Config {
"_name_or_path": "/root/xinference/cache/qwen2-instruct-pytorch-7b",
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 151643,
"eos_token_id": 151645,
"hidden_act": "silu",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 18944,
"max_position_embeddings": 32768,
"max_window_layers": 28,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"rms_norm_eps": 1e-06,
"rope_theta": 1000000.0,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.43.4",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 152064
}
2024-09-13 11:23:21,489 transformers.models.auto.image_processing_auto 1112 INFO Could not locate the image processor configuration file, will try to use the model config instead.
2024-09-13 11:23:21,491 xinference.core.worker 108 ERROR Failed to load model qwen2-instruct-GRhW312s-1-0
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
2024-09-13 11:23:21,514 xinference.core.worker 108 ERROR [request 3f6fd318-71fd-11ef-a59f-0242ac110002] Leave launch_builtin_model, error: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero, elapsed time: 3 s
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 69, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
2024-09-13 11:23:21,520 xinference.api.restful_api 1 ERROR [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 876, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1026, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 990, in _launch_model
await _launch_one_model(rep_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 969, in _launch_one_model
await worker_ref.launch_builtin_model(
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 69, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 893, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 309, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/vllm/core.py", line 240, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 726, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 997, in create_engine_config
return EngineConfig(
File "", line 14, in init
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 1863, in post_init
self.model_config.verify_with_parallel_config(self.parallel_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 402, in verify_with_parallel_config
if total_num_attention_heads % tensor_parallel_size != 0:
ZeroDivisionError: [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
Version info / 版本信息
latest
The command used to start Xinference / 用以启动 xinference 的命令
docker run -v /home/mcn/xinference:/root/xinference -e XINFERENCE_HOME=/root/xinference -p 9998:9997 --gpus all xprobe/xinference:latest xinference-local -H 0.0.0.0
Reproduction / 复现过程
qwen2-instruct
vllm
pytorch
7
none
auto
1
点确定后报错
Server error: 500 - [address=0.0.0.0:39009, pid=1112] integer division or modulo by zero
Expected behavior / 期待表现
期待解答
The text was updated successfully, but these errors were encountered: