Skip to content

FileNotFoundError for triton cache object, but happens every time #4002

@pseudotensor

Description

@pseudotensor
Exception in ModelRpcClient:
Traceback (most recent call last):
  File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 175, in exposed_step
    self.forward_step()
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 204, in forward_step
    self.forward_decode_batch(self.running_batch)
  File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_rpc.py", line 534, in forward_decode_batch
    ) = self.model_runner.forward(batch, ForwardMode.DECODE)
  File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_runner.py", line 406, in forward
    return self.forward_decode(batch)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/managers/router/model_runner.py", line 375, in forward_decode
    return self.model.forward(
  File "/home/ubuntu/sglang/python/sglang/srt/models/llava_qwen.py", line 247, in forward
    return self.language_model(input_ids, positions, input_metadata)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 269, in forward
    hidden_states = self.model(input_ids, positions, input_metadata, input_embeds)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 239, in forward
    hidden_states, residual = layer(
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 191, in forward
    hidden_states = self.self_attn(
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/models/qwen2.py", line 140, in forward
    attn_output = self.attn(q, k, v, input_metadata)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ubuntu/sglang/python/sglang/srt/layers/radix_attention.py", line 125, in forward
    return self.decode_forward(q, k, v, input_metadata)
  File "/home/ubuntu/sglang/python/sglang/srt/layers/radix_attention.py", line 79, in decode_forward_triton
    token_attention_fwd(
  File "/home/ubuntu/sglang/python/sglang/srt/layers/token_attention.py", line 328, in token_attention_fwd
    _token_att_m_fwd(
  File "/home/ubuntu/sglang/python/sglang/srt/layers/token_attention.py", line 220, in _token_att_m_fwd
    _fwd_kernel_stage1[grid](
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/runtime/jit.py", line 167, in <lambda>
    return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/runtime/jit.py", line 416, in run
    self.cache[device][key] = compile(
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 202, in compile
    return CompiledKernel(so_path, metadata_group.get(metadata_filename))
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 230, in __init__
    self.asm = {
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/site-packages/triton/compiler/compiler.py", line 231, in <dictcomp>
    file.suffix[1:]: file.read_bytes() if file.suffix[1:] == driver.binary_ext else file.read_text()
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/pathlib.py", line 1134, in read_text
    with self.open(mode='r', encoding=encoding, errors=errors) as f:
  File "/home/ubuntu/miniconda3/envs/sglang/lib/python3.10/pathlib.py", line 1119, in open
    return self._accessor.open(self, mode, buffering, encoding, errors,
FileNotFoundError: [Errno 2] No such file or directory: '/home/ubuntu/.triton/cache/07c081a6457a8ebc244495f5c4a16661/_fwd_kernel_stage1.json.tmp.pid_4191531_726754'

sgl-project/sglang#472

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions