albertoperdomo2
diff --git a/‎benchmarks/kernels/bench_per_token_quant_fp8.py‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/kernels/bench_per_token_quant_fp8.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/kernels/benchmark_activation.py‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/kernels/benchmark_activation.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/kernels/benchmark_layernorm.py‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/kernels/benchmark_layernorm.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/kernels/benchmark_paged_attention.py‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/kernels/benchmark_paged_attention.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/kernels/benchmark_quant.py‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/kernels/benchmark_quant.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/kernels/benchmark_reshape_and_cache.py‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/kernels/benchmark_reshape_and_cache.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/kernels/benchmark_reshape_and_cache_flash.py‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/kernels/benchmark_reshape_and_cache_flash.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/compile/piecewise/test_full_cudagraph.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/compile/piecewise/test_full_cudagraph.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/compile/piecewise/test_multiple_graphs.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/compile/piecewise/test_multiple_graphs.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/compile/piecewise/test_simple.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/compile/piecewise/test_simple.py‎
Lines changed: 1 addition & 1 deletion
@@ -10,7 +10,8 @@
 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
 from vllm.triton_utils import triton
-from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
 
 def with_triton_mode(fn):
 
@@ -10,7 +10,8 @@
 from vllm.model_executor.custom_op import CustomOp
 from vllm.platforms import current_platform
 from vllm.triton_utils import triton
-from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
 batch_size_range = [1, 16, 32, 64, 128]
 seq_len_range = [1, 16, 64, 128, 256, 512, 1024, 2048, 4096]
 
@@ -7,7 +7,8 @@
 
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.platforms import current_platform
-from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
 
 @torch.inference_mode()
 
@@ -9,9 +9,9 @@
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import (
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import (
     STR_DTYPE_TO_TORCH_DTYPE,
-    FlexibleArgumentParser,
     create_kv_caches_with_random,
 )
 
 
@@ -7,7 +7,8 @@
 
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
-from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
 
 
 @torch.inference_mode()
 
@@ -9,9 +9,9 @@
 from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import (
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import (
     STR_DTYPE_TO_TORCH_DTYPE,
-    FlexibleArgumentParser,
     create_kv_caches_with_random,
 )
 
 
@@ -12,9 +12,9 @@
 )
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import (
+from vllm.utils import FlexibleArgumentParser
+from vllm.utils.torch_utils import (
     STR_DTYPE_TO_TORCH_DTYPE,
-    FlexibleArgumentParser,
     create_kv_caches_with_random_flash,
 )
 
 
@@ -11,7 +11,7 @@
 from vllm import LLM, SamplingParams
 from vllm.config import CompilationConfig
 from vllm.platforms import current_platform
-from vllm.utils import is_torch_equal_or_newer
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 
 @contextlib.contextmanager
 
@@ -20,7 +20,7 @@
     set_current_vllm_config,
 )
 from vllm.forward_context import BatchDescriptor, set_forward_context
-from vllm.utils import is_torch_equal_or_newer
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 # This import automatically registers `torch.ops.silly.attention`
 from .. import silly_attention  # noqa: F401
 
@@ -19,7 +19,7 @@
     set_current_vllm_config,
 )
 from vllm.forward_context import BatchDescriptor, set_forward_context
-from vllm.utils import is_torch_equal_or_newer
+from vllm.utils.torch_utils import is_torch_equal_or_newer
 
 # This import automatically registers `torch.ops.silly.attention`
 from ..silly_attention import get_global_counter, reset_global_counter
Original file line number	Diff line number	Diff line change
`@@ -12,9 +12,9 @@`
`12`	`12`	`)`
`13`	`13`	`from vllm.logger import init_logger`
`14`	`14`	`from vllm.platforms import current_platform`
`15`		`-from vllm.utils import (`
	`15`	`+from vllm.utils import FlexibleArgumentParser`
	`16`	`+from vllm.utils.torch_utils import (`
`16`	`17`	`STR_DTYPE_TO_TORCH_DTYPE,`
`17`		`- FlexibleArgumentParser,`
`18`	`18`	`create_kv_caches_with_random_flash,`
`19`	`19`	`)`
`20`	`20`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@`
`20`	`20`	`set_current_vllm_config,`
`21`	`21`	`)`
`22`	`22`	`from vllm.forward_context import BatchDescriptor, set_forward_context`
`23`		`-from vllm.utils import is_torch_equal_or_newer`
	`23`	`+from vllm.utils.torch_utils import is_torch_equal_or_newer`
`24`	`24`
`25`	`25`	# This import automatically registers `torch.ops.silly.attention`
`26`	`26`	`from .. import silly_attention # noqa: F401`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@`
`19`	`19`	`set_current_vllm_config,`
`20`	`20`	`)`
`21`	`21`	`from vllm.forward_context import BatchDescriptor, set_forward_context`
`22`		`-from vllm.utils import is_torch_equal_or_newer`
	`22`	`+from vllm.utils.torch_utils import is_torch_equal_or_newer`
`23`	`23`
`24`	`24`	# This import automatically registers `torch.ops.silly.attention`
`25`	`25`	`from ..silly_attention import get_global_counter, reset_global_counter`