Skip to content

Commit b207373

Browse files
Isotr0pyalbertoperdomo2
authored andcommitted
[Chore] Clean up pytorch helper functions in vllm.utils (vllm-project#26908)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: isotr0py <2037008807@qq.com> Signed-off-by: Alberto Perdomo <aperdomo@redhat.com>
1 parent f1c17de commit b207373

File tree

119 files changed

+772
-714
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+772
-714
lines changed

benchmarks/kernels/bench_per_token_quant_fp8.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
1111
from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
1212
from vllm.triton_utils import triton
13-
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
13+
from vllm.utils import FlexibleArgumentParser
14+
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
1415

1516

1617
def with_triton_mode(fn):

benchmarks/kernels/benchmark_activation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from vllm.model_executor.custom_op import CustomOp
1111
from vllm.platforms import current_platform
1212
from vllm.triton_utils import triton
13-
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
13+
from vllm.utils import FlexibleArgumentParser
14+
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
1415

1516
batch_size_range = [1, 16, 32, 64, 128]
1617
seq_len_range = [1, 16, 64, 128, 256, 512, 1024, 2048, 4096]

benchmarks/kernels/benchmark_layernorm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
from vllm.model_executor.layers.layernorm import RMSNorm
99
from vllm.platforms import current_platform
10-
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
10+
from vllm.utils import FlexibleArgumentParser
11+
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
1112

1213

1314
@torch.inference_mode()

benchmarks/kernels/benchmark_paged_attention.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from vllm import _custom_ops as ops
1010
from vllm.logger import init_logger
1111
from vllm.platforms import current_platform
12-
from vllm.utils import (
12+
from vllm.utils import FlexibleArgumentParser
13+
from vllm.utils.torch_utils import (
1314
STR_DTYPE_TO_TORCH_DTYPE,
14-
FlexibleArgumentParser,
1515
create_kv_caches_with_random,
1616
)
1717

benchmarks/kernels/benchmark_quant.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
from vllm import _custom_ops as ops
99
from vllm.platforms import current_platform
10-
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
10+
from vllm.utils import FlexibleArgumentParser
11+
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
1112

1213

1314
@torch.inference_mode()

benchmarks/kernels/benchmark_reshape_and_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from vllm import _custom_ops as ops
1010
from vllm.logger import init_logger
1111
from vllm.platforms import current_platform
12-
from vllm.utils import (
12+
from vllm.utils import FlexibleArgumentParser
13+
from vllm.utils.torch_utils import (
1314
STR_DTYPE_TO_TORCH_DTYPE,
14-
FlexibleArgumentParser,
1515
create_kv_caches_with_random,
1616
)
1717

benchmarks/kernels/benchmark_reshape_and_cache_flash.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
)
1313
from vllm.logger import init_logger
1414
from vllm.platforms import current_platform
15-
from vllm.utils import (
15+
from vllm.utils import FlexibleArgumentParser
16+
from vllm.utils.torch_utils import (
1617
STR_DTYPE_TO_TORCH_DTYPE,
17-
FlexibleArgumentParser,
1818
create_kv_caches_with_random_flash,
1919
)
2020

tests/compile/piecewise/test_full_cudagraph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm import LLM, SamplingParams
1212
from vllm.config import CompilationConfig
1313
from vllm.platforms import current_platform
14-
from vllm.utils import is_torch_equal_or_newer
14+
from vllm.utils.torch_utils import is_torch_equal_or_newer
1515

1616

1717
@contextlib.contextmanager

tests/compile/piecewise/test_multiple_graphs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
set_current_vllm_config,
2121
)
2222
from vllm.forward_context import BatchDescriptor, set_forward_context
23-
from vllm.utils import is_torch_equal_or_newer
23+
from vllm.utils.torch_utils import is_torch_equal_or_newer
2424

2525
# This import automatically registers `torch.ops.silly.attention`
2626
from .. import silly_attention # noqa: F401

tests/compile/piecewise/test_simple.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
set_current_vllm_config,
2020
)
2121
from vllm.forward_context import BatchDescriptor, set_forward_context
22-
from vllm.utils import is_torch_equal_or_newer
22+
from vllm.utils.torch_utils import is_torch_equal_or_newer
2323

2424
# This import automatically registers `torch.ops.silly.attention`
2525
from ..silly_attention import get_global_counter, reset_global_counter

0 commit comments

Comments
 (0)