-
Notifications
You must be signed in to change notification settings - Fork 555
Description
Your current environment
环境:vllm:0.9.1 vllm-dev:0.9.1-dev torch2.5.1 cann:8.2.rc1 910b3 节点数:8*8 (ps:该bug在固定step稳定复现)
shell
python3 -m verl.trainer.main_ppo
algorithm.adv_estimator=grpo
data.train_files=/Data/gsm8k/train.parquet
data.val_files=/Data/gsm8k/test.parquet
data.train_batch_size=1024
data.max_prompt_length=512
data.max_response_length=1024
data.filter_overlong_prompts=True
data.truncation='error'
data.shuffle=False
actor_rollout_ref.model.path=/Model/Qwen2.5-3B-Instruct
actor_rollout_ref.model.use_shm=True
actor_rollout_ref.model.lora_rank=64
actor_rollout_ref.model.lora_alpha=32
actor_rollout_ref.actor.optim.lr=3e-6
actor_rollout_ref.actor.use_torch_compile=False
actor_rollout_ref.model.use_remove_padding=True
actor_rollout_ref.actor.ppo_mini_batch_size=128
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2
actor_rollout_ref.actor.use_kl_loss=True
actor_rollout_ref.actor.kl_loss_coef=0.001
actor_rollout_ref.actor.kl_loss_type=low_var_kl
actor_rollout_ref.actor.entropy_coeff=0
actor_rollout_ref.model.enable_gradient_checkpointing=True
actor_rollout_ref.actor.fsdp_config.param_offload=False
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2
actor_rollout_ref.rollout.tensor_model_parallel_size=2
actor_rollout_ref.rollout.name=vllm
actor_rollout_ref.rollout.gpu_memory_utilization=0.5
actor_rollout_ref.rollout.n=5
actor_rollout_ref.rollout.load_format=safetensors
actor_rollout_ref.rollout.layered_summon=True
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2
actor_rollout_ref.ref.fsdp_config.param_offload=True
algorithm.use_kl_in_reward=False
trainer.critic_warmup=0
trainer.logger=['console']
trainer.project_name='verl_grpo_example_gsm8k'
trainer.experiment_name='qwen2.5_3b_grpo_lora'
trainer.n_gpus_per_node=8
trainer.nnodes=8
trainer.save_freq=-1
trainer.test_freq=5
trainer.total_epochs=15
trainer.device=npu $@
🐛 Describe the bug
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m [rank50]:[E717 12:58:15.737425654 compiler_depend.ts:422] call aclnnInplaceCopy failed, detail:EZ1001: [PID: 2086] 2025-07-17-12:58:15.434.621 8462 and 8192 cannot broadcast.
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m TraceBack (most recent call last):
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m The size of tensor self [8192] must match the size of tensor src [8462].
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m [ERROR] 2025-07-17-12:58:15 (PID:2086, Device:0, RankID:50) ERR01100 OPS call acl api failed
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m Exception raised from operator() at build/CMakeFiles/torch_npu.dir/compiler_depend.ts:88 (most recent call first):
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0xb8 (0xffd036eef908 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/lib/libc10.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::string const&) + 0x6c (0xffd036e9e404 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/lib/libc10.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #2: + 0x6e0968 (0xffd02c7c0968 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch_npu/lib/libtorch_npu.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #3: + 0x1644484 (0xffd02d724484 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch_npu/lib/libtorch_npu.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #4: + 0x78d244 (0xffd02c86d244 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch_npu/lib/libtorch_npu.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #5: + 0x78da58 (0xffd02c86da58 in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch_npu/lib/libtorch_npu.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #6: + 0x78a1cc (0xffd02c86a1cc in /home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch_npu/lib/libtorch_npu.so)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #7: + 0xbea5c (0xffffaeb5da5c in /usr/lib64/libstdc++.so.6)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #8: + 0x87c4 (0xffffbdbc07c4 in /usr/lib64/libpthread.so.0)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m frame #9: + 0xdbcec (0xffffbd9facec in /usr/lib64/libc.so.6)
�[36m(WorkerDict pid=2086, ip=172.16.2.29)�[0m
�[36m(TaskRunner pid=12658)�[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): �[36mray::WorkerDict.actor_rollout_generate_sequences()�[39m (pid=22436, ip=172.16.1.46, actor_id=7f12ab1126908dcab67a05a101000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0xffcfcbfd1330>)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/single_controller/ray/base.py", line 663, in func
�[36m(TaskRunner pid=12658)�[0m return getattr(self.worker_dict[key], name)(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/single_controller/base/decorator.py", line 540, in inner
�[36m(TaskRunner pid=12658)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/workers/fsdp_workers.py", line 662, in generate_sequences
�[36m(TaskRunner pid=12658)�[0m output = self.rollout.generate_sequences(prompts=prompts)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/utils/debug/performance.py", line 78, in f
�[36m(TaskRunner pid=12658)�[0m return self.log(decorated_function, *args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/utils/debug/performance.py", line 88, in log
�[36m(TaskRunner pid=12658)�[0m output = func(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
�[36m(TaskRunner pid=12658)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py", line 269, in generate_sequences
�[36m(TaskRunner pid=12658)�[0m outputs = self.inference_engine.generate(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/utils.py", line 1267, in inner
�[36m(TaskRunner pid=12658)�[0m return fn(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 474, in generate
�[36m(TaskRunner pid=12658)�[0m outputs = self._run_engine(use_tqdm=use_tqdm)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 1517, in _run_engine
�[36m(TaskRunner pid=12658)�[0m step_outputs = self.llm_engine.step()
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 1352, in step
�[36m(TaskRunner pid=12658)�[0m outputs = self.model_executor.execute_model(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 141, in execute_model
�[36m(TaskRunner pid=12658)�[0m output = self.collective_rpc("execute_model",
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 57, in collective_rpc
�[36m(TaskRunner pid=12658)�[0m answer = run_method(self.driver_worker, method, args, kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/utils.py", line 2671, in run_method
�[36m(TaskRunner pid=12658)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 421, in execute_model
�[36m(TaskRunner pid=12658)�[0m output = self.model_runner.execute_model(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
�[36m(TaskRunner pid=12658)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm_ascend/worker/model_runner.py", line 1344, in execute_model
�[36m(TaskRunner pid=12658)�[0m self.set_active_loras(model_input.lora_requests,
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm_ascend/worker/model_runner.py", line 1218, in set_active_loras
�[36m(TaskRunner pid=12658)�[0m self.lora_manager.set_active_adapters(lora_requests, lora_mapping)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/worker_manager.py", line 170, in set_active_adapters
�[36m(TaskRunner pid=12658)�[0m set_active_adapters_worker(requests, mapping, self._apply_adapters,
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/adapter_commons/utils.py", line 56, in set_active_adapters_worker
�[36m(TaskRunner pid=12658)�[0m set_adapter_mapping_func(mapping)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/models.py", line 706, in set_adapter_mapping
�[36m(TaskRunner pid=12658)�[0m self._last_mapping = set_adapter_mapping(mapping, self._last_mapping,
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/adapter_commons/utils.py", line 31, in set_adapter_mapping
�[36m(TaskRunner pid=12658)�[0m set_mapping_func(mapping)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/models.py", line 471, in _set_adapter_mapping
�[36m(TaskRunner pid=12658)�[0m self.punica_wrapper.update_metadata(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/punica_wrapper/punica_base.py", line 339, in update_metadata
�[36m(TaskRunner pid=12658)�[0m self._update_prefill_metadata(self.token_lora_indices)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/punica_wrapper/punica_base.py", line 216, in _update_prefill_metadata
�[36m(TaskRunner pid=12658)�[0m no_lora) = compute_meta(token_lora_tensor)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/punica_wrapper/utils.py", line 26, in compute_meta
�[36m(TaskRunner pid=12658)�[0m lora_indices_tensor, seq_length_tensor = torch.unique_consecutive(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/_jit_internal.py", line 624, in fn
�[36m(TaskRunner pid=12658)�[0m return if_false(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/_jit_internal.py", line 622, in fn
�[36m(TaskRunner pid=12658)�[0m return if_true(*args, **kwargs)
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/functional.py", line 1143, in _consecutive_return_counts
�[36m(TaskRunner pid=12658)�[0m output, _, counts = _unique_consecutive_impl(
�[36m(TaskRunner pid=12658)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/functional.py", line 1041, in _unique_consecutive_impl
�[36m(TaskRunner pid=12658)�[0m output, inverse_indices, counts = _VF.unique_consecutive( # type: ignore[attr-defined]
�[36m(TaskRunner pid=12658)�[0m RuntimeError: The Inner error is reported as above. The process exits for this inner error, and the current working operator name is aclnnInplaceCopy.
�[36m(TaskRunner pid=12658)�[0m Since the operator is called asynchronously, the stacktrace may be inaccurate. If you want to get the accurate stacktrace, pleace set the environment variable ASCEND_LAUNCH_BLOCKING=1.
�[36m(TaskRunner pid=12658)�[0m Note: ASCEND_LAUNCH_BLOCKING=1 will force ops to run in synchronous mode, resulting in performance degradation. Please unset ASCEND_LAUNCH_BLOCKING in time after debugging.
单机环境也出现类似报错,环境同上:
shell脚本:
python3 -m verl.trainer.main_ppo
algorithm.adv_estimator=grpo
data.train_files=/home/ma-user/work/hjx_train/verl/train_data/gsm8k/train.parquet
data.val_files=/home/ma-user/work/hjx_train/verl/train_data/gsm8k/test.parquet
data.train_batch_size=128
data.max_prompt_length=512
data.max_response_length=1024
data.filter_overlong_prompts=True
data.truncation='error'
data.shuffle=False
actor_rollout_ref.model.path=/home/ma-user/work/Qwen2.5-3B-Instruct
actor_rollout_ref.model.use_shm=True
actor_rollout_ref.model.lora_rank=64
actor_rollout_ref.model.lora_alpha=32
actor_rollout_ref.actor.optim.lr=3e-6
actor_rollout_ref.actor.use_torch_compile=False
actor_rollout_ref.model.use_remove_padding=True
actor_rollout_ref.actor.ppo_mini_batch_size=64
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=40
actor_rollout_ref.actor.use_kl_loss=True
actor_rollout_ref.actor.kl_loss_coef=0.001
actor_rollout_ref.actor.kl_loss_type=low_var_kl
actor_rollout_ref.actor.entropy_coeff=0
actor_rollout_ref.model.enable_gradient_checkpointing=True
actor_rollout_ref.actor.fsdp_config.param_offload=False
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=8
actor_rollout_ref.rollout.tensor_model_parallel_size=4
actor_rollout_ref.rollout.name=vllm
actor_rollout_ref.rollout.gpu_memory_utilization=0.5
actor_rollout_ref.rollout.n=5
actor_rollout_ref.rollout.load_format=safetensors
actor_rollout_ref.rollout.layered_summon=True
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40
actor_rollout_ref.ref.fsdp_config.param_offload=True
algorithm.use_kl_in_reward=False
trainer.critic_warmup=0
trainer.logger=['console']
trainer.project_name='verl_grpo_example_gsm8k'
trainer.experiment_name='qwen2.5_3b_grpo_lora'
trainer.n_gpus_per_node=8
trainer.nnodes=1
trainer.save_freq=-1
trainer.test_freq=5
trainer.total_epochs=2
trainer.device=npu $@
报错信息:
Training Progress: 45%|████████████████████████████████████████▎ | 52/116 [7:15:40<8:32:14, 480.23s/it]
�[36m(TaskRunner pid=2310088)�[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): �[36mray::WorkerDict.actor_rollout_generate_sequences()�[39m (pid=2322349, ip=172.16.0.238, actor_id=06371f3131d628f787a0b90201000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0xffcfd905b580>)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/single_controller/ray/base.py", line 663, in func
�[36m(TaskRunner pid=2310088)�[0m return getattr(self.worker_dict[key], name)(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/single_controller/base/decorator.py", line 540, in inner
�[36m(TaskRunner pid=2310088)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/workers/fsdp_workers.py", line 662, in generate_sequences
�[36m(TaskRunner pid=2310088)�[0m output = self.rollout.generate_sequences(prompts=prompts)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/utils/debug/performance.py", line 78, in f
�[36m(TaskRunner pid=2310088)�[0m return self.log(decorated_function, *args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/utils/debug/performance.py", line 88, in log
�[36m(TaskRunner pid=2310088)�[0m output = func(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
�[36m(TaskRunner pid=2310088)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/work/hjx_train/verl/verl/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py", line 269, in generate_sequences
�[36m(TaskRunner pid=2310088)�[0m outputs = self.inference_engine.generate(
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/utils.py", line 1267, in inner
�[36m(TaskRunner pid=2310088)�[0m return fn(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 474, in generate
�[36m(TaskRunner pid=2310088)�[0m outputs = self._run_engine(use_tqdm=use_tqdm)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 1517, in _run_engine
�[36m(TaskRunner pid=2310088)�[0m step_outputs = self.llm_engine.step()
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 1352, in step
�[36m(TaskRunner pid=2310088)�[0m outputs = self.model_executor.execute_model(
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/executor/executor_base.py", line 141, in execute_model
�[36m(TaskRunner pid=2310088)�[0m output = self.collective_rpc("execute_model",
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/executor/uniproc_executor.py", line 57, in collective_rpc
�[36m(TaskRunner pid=2310088)�[0m answer = run_method(self.driver_worker, method, args, kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/utils.py", line 2671, in run_method
�[36m(TaskRunner pid=2310088)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 421, in execute_model
�[36m(TaskRunner pid=2310088)�[0m output = self.model_runner.execute_model(
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
�[36m(TaskRunner pid=2310088)�[0m return func(*args, **kwargs)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm_ascend/worker/model_runner.py", line 1344, in execute_model
�[36m(TaskRunner pid=2310088)�[0m self.set_active_loras(model_input.lora_requests,
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm_ascend/worker/model_runner.py", line 1218, in set_active_loras
�[36m(TaskRunner pid=2310088)�[0m self.lora_manager.set_active_adapters(lora_requests, lora_mapping)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/worker_manager.py", line 170, in set_active_adapters
�[36m(TaskRunner pid=2310088)�[0m set_active_adapters_worker(requests, mapping, self._apply_adapters,
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/adapter_commons/utils.py", line 56, in set_active_adapters_worker
�[36m(TaskRunner pid=2310088)�[0m set_adapter_mapping_func(mapping)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/models.py", line 706, in set_adapter_mapping
�[36m(TaskRunner pid=2310088)�[0m self._last_mapping = set_adapter_mapping(mapping, self._last_mapping,
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/adapter_commons/utils.py", line 31, in set_adapter_mapping
�[36m(TaskRunner pid=2310088)�[0m set_mapping_func(mapping)
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/models.py", line 471, in _set_adapter_mapping
�[36m(TaskRunner pid=2310088)�[0m self.punica_wrapper.update_metadata(
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/punica_wrapper/punica_base.py", line 334, in update_metadata
�[36m(TaskRunner pid=2310088)�[0m self._update_base_metadata(mapping, lora_index_to_id, max_loras,
�[36m(TaskRunner pid=2310088)�[0m File "/home/ma-user/anaconda3/envs/PyTorch-2.1.0/lib/python3.10/site-packages/vllm/lora/punica_wrapper/punica_base.py", line 197, in _update_base_metadata
�[36m(TaskRunner pid=2310088)�[0m self.token_lora_indices[:base_indices.shape[0]].copy(base_indices)
�[36m(TaskRunner pid=2310088)�[0m RuntimeError: call aclnnInplaceCopy failed, detail:EL0004: [PID: 2322349] 2025-07-17-12:39:05.240.339 Failed to allocate memory.
�[36m(TaskRunner pid=2310088)�[0m Possible Cause: Available memory is insufficient.
�[36m(TaskRunner pid=2310088)�[0m Solution: Close applications not in use.
�[36m(TaskRunner pid=2310088)�[0m TraceBack (most recent call last):
�[36m(TaskRunner pid=2310088)�[0m alloc device memory failed, runtime result = 207001[FUNC:ReportCallError][FILE:log_inner.cpp][LINE:161]
�[36m(TaskRunner pid=2310088)�[0m 8264 and 8192 cannot broadcast.
�[36m(TaskRunner pid=2310088)�[0m The size of tensor self [8192] must match the size of tensor src [8264].
�[36m(TaskRunner pid=2310088)�[0m
�[36m(TaskRunner pid=2310088)�[0m [ERROR] 2025-07-17-18:36:50 (PID:2322349, Device:0, RankID:7) ERR01100 OPS call acl api failed