Skip to content

Commit 39e24e5

Browse files
iAmir97iAmir97gemini-code-assist[bot]
authored andcommitted
[Chore] Separate out vllm.utils.mem_utils (vllm-project#27143)
Signed-off-by: iAmir97 <Amir.balwel@embeddedllm.com> Signed-off-by: iAmir97 <71513472+iAmir97@users.noreply.github.com> Co-authored-by: iAmir97 <Amir.balwel@embeddedllm.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Alberto Perdomo <aperdomo@redhat.com>
1 parent ac560ec commit 39e24e5

File tree

19 files changed

+268
-252
lines changed

19 files changed

+268
-252
lines changed

tests/basic_correctness/test_cumem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from vllm import LLM, SamplingParams
88
from vllm.device_allocator.cumem import CuMemAllocator
9-
from vllm.utils import GiB_bytes
9+
from vllm.utils.mem_constants import GiB_bytes
1010

1111
from ..utils import create_new_process_for_each_test
1212

tests/kernels/attention/test_attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm import _custom_ops as ops
1212
from vllm.attention.layer import Attention, MultiHeadAttention
1313
from vllm.platforms import current_platform
14-
from vllm.utils import get_max_shared_memory_bytes
14+
from vllm.utils.mem_utils import get_max_shared_memory_bytes
1515

1616
if not current_platform.is_rocm():
1717
from xformers import ops as xops

tests/models/test_initialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
from vllm import LLM
10-
from vllm.utils import GiB_bytes
10+
from vllm.utils.mem_constants import GiB_bytes
1111
from vllm.v1.core.kv_cache_utils import (
1212
generate_scheduler_kv_cache_config,
1313
get_kv_cache_configs,

tests/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@
4646
from vllm.transformers_utils.tokenizer import get_tokenizer
4747
from vllm.utils import (
4848
FlexibleArgumentParser,
49-
GB_bytes,
5049
cuda_device_count_stateless,
5150
get_open_port,
5251
)
52+
from vllm.utils.mem_constants import GB_bytes
5353

5454
if current_platform.is_rocm():
5555
from amdsmi import (

tests/utils_/test_utils.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
from vllm.utils import (
2525
FlexibleArgumentParser,
26-
MemorySnapshot,
2726
bind_kv_cache,
2827
common_broadcastable_dtype,
2928
current_stream,
@@ -33,13 +32,13 @@
3332
join_host_port,
3433
make_zmq_path,
3534
make_zmq_socket,
36-
memory_profiling,
3735
sha256,
3836
split_host_port,
3937
split_zmq_path,
4038
unique_filepath,
4139
)
4240

41+
from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
4342
from ..utils import create_new_process_for_each_test, flat_product
4443

4544

tests/v1/core/test_kv_cache_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
PlaceholderRange,
1515
)
1616
from vllm.sampling_params import SamplingParams
17-
from vllm.utils import GiB_bytes, sha256, sha256_cbor
17+
from vllm.utils import sha256, sha256_cbor
18+
from vllm.utils.mem_constants import GiB_bytes
1819
from vllm.v1.core.kv_cache_manager import KVCacheManager
1920
from vllm.v1.core.kv_cache_utils import (
2021
BlockHash,

tests/v1/tpu/worker/test_tpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
)
1414
from vllm.pooling_params import PoolingParams
1515
from vllm.sampling_params import SamplingParams
16-
from vllm.utils import GiB_bytes
16+
from vllm.utils.mem_constants import GiB_bytes
1717
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
1818
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
1919
from vllm.v1.worker.tpu_model_runner import (

tests/v1/worker/test_gpu_model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2
2222
from vllm.platforms import current_platform
2323
from vllm.sampling_params import SamplingParams
24-
from vllm.utils import GiB_bytes, update_environment_variables
24+
from vllm.utils import update_environment_variables
25+
from vllm.utils.mem_constants import GiB_bytes
2526
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
2627
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
2728
from vllm.v1.kv_cache_interface import (

tests/v1/worker/test_worker_memory_snapshot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import torch
1212

1313
from vllm.engine.arg_utils import EngineArgs
14-
from vllm.utils import MemorySnapshot
14+
from vllm.utils.mem_utils import MemorySnapshot
1515
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
1616

1717
# Global queue to track operation order across processes

vllm/config/cache.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
from vllm.config.utils import config
1212
from vllm.logger import init_logger
13-
from vllm.utils import GiB_bytes, get_cpu_memory
13+
from vllm.utils.mem_constants import GiB_bytes
14+
from vllm.utils.mem_utils import get_cpu_memory
1415

1516
if TYPE_CHECKING:
1617
from vllm.config.parallel import ParallelConfig

0 commit comments

Comments
 (0)