-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Closed
Description
Exception in ModelRpcClient:
Traceback (most recent call last):
File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 175, in exposed_step
self.forward_step()
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 204, in forward_step
self.forward_decode_batch(self.running_batch)
File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 534, in forward_decode_batch
) = self.model_runner.forward(batch, ForwardMode.DECODE)
File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_runner.py", line 406, in forward
return self.forward_decode(batch)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_runner.py", line 375, in forward_decode
return self.model.forward(
File "/home/ubuntu/sglang/python/sglang/srt/models/llava_qwen.py", line 247, in forward
return self.language_model(input_ids, positions, input_metadata)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 269, in forward
hidden_states = self.model(input_ids, positions, input_metadata, input_embeds)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 239, in forward
hidden_states, residual = layer(
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 191, in forward
hidden_states = self.self_attn(
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 140, in forward
attn_output = self.attn(q, k, v, input_metadata)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/sglang/python/sglang/srt/layers/radix_attention.py", line 125, in forward
return self.decode_forward(q, k, v, input_metadata)
File "/home/ubuntu/sglang/python/sglang/srt/layers/radix_attention.py", line 79, in decode_forward_triton
token_attention_fwd(
File "/home/ubuntu/sglang/python/sglang/srt/layers/token_attention.py", line 328, in token_attention_fwd
_token_att_m_fwd(
File "/home/ubuntu/sglang/python/sglang/srt/layers/token_attention.py", line 220, in _token_att_m_fwd
_fwd_kernel_stage1[grid](
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/runtime/jit.py", line 167, in <lambda>
return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/runtime/jit.py", line 416, in run
self.cache[device][key] = compile(
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 202, in compile
return CompiledKernel(so_path, metadata_group.get(metadata_filename))
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 230, in __init__
self.asm = {
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 231, in <dictcomp>
file.suffix[1:]: file.read_bytes() if file.suffix[1:] == driver.binary_ext else file.read_text()
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/pathlib.py", line 1134, in read_text
with self.open(mode='r', encoding=encoding, errors=errors) as f:
File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/pathlib.py", line 1119, in open
return self._accessor.open(self, mode, buffering, encoding, errors,
FileNotFoundError: [Errno 2] No such file or directory: '/home/ubuntu/.triton/cache/07c081a6457a8ebc244495f5c4a16661/_fwd_kernel_stage1.json.tmp.pid_4191531_726754'
Metadata
Metadata
Assignees
Labels
No labels