|
40 | 40 | import vllm.envs as envs |
41 | 41 | from vllm.distributed.device_communicators.base_device_communicator import ( |
42 | 42 | DeviceCommunicatorBase) |
43 | | -from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase |
44 | | -from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1 |
45 | 43 | from vllm.distributed.utils import StatelessProcessGroup |
46 | 44 | from vllm.logger import init_logger |
47 | 45 | from vllm.utils import (direct_register_custom_op, resolve_obj_by_qualname, |
48 | 46 | supports_custom_op) |
49 | 47 |
|
50 | 48 | if TYPE_CHECKING: |
51 | | - from vllm.config import VllmConfig |
| 49 | + pass |
52 | 50 |
|
53 | 51 |
|
54 | 52 | @dataclass |
@@ -771,44 +769,6 @@ def get_pp_group() -> GroupCoordinator: |
771 | 769 | # kept for backward compatibility |
772 | 770 | get_pipeline_model_parallel_group = get_pp_group |
773 | 771 |
|
774 | | -# TODO: once we deprecate V0 KV transfer, we can move this to |
775 | | -# be a non-global object. |
776 | | -_KV_CONNECTOR_AGENT: Union[KVConnectorBase, KVConnectorBase_V1, None] = None |
777 | | - |
778 | | - |
779 | | -def get_kv_transfer_group() -> Union[KVConnectorBase, KVConnectorBase_V1]: |
780 | | - assert _KV_CONNECTOR_AGENT is not None, ( |
781 | | - "disaggregated KV cache transfer parallel group is not initialized") |
782 | | - return _KV_CONNECTOR_AGENT |
783 | | - |
784 | | - |
785 | | -def has_kv_transfer_group() -> bool: |
786 | | - return _KV_CONNECTOR_AGENT is not None |
787 | | - |
788 | | - |
789 | | -def is_v1_kv_transfer_group( |
790 | | - connector: Union[KVConnectorBase_V1, KVConnectorBase, |
791 | | - None] = None) -> bool: |
792 | | - """Check if the KV connector is the v1 connector. |
793 | | - If the argument is None, it will check the global KV connector |
794 | | -
|
795 | | - Args: |
796 | | - connector: The KV connector to check. If None, it will check the |
797 | | - global KV connector. |
798 | | -
|
799 | | - Note: |
800 | | - This function will no-longer be needed after the v1 KV connector |
801 | | - becomes the default. |
802 | | - """ |
803 | | - if connector is None: |
804 | | - connector = _KV_CONNECTOR_AGENT |
805 | | - |
806 | | - if connector is None: |
807 | | - # Global KV connector is not set |
808 | | - return False |
809 | | - |
810 | | - return isinstance(connector, KVConnectorBase_V1) |
811 | | - |
812 | 772 |
|
813 | 773 | @contextmanager |
814 | 774 | def graph_capture(device: torch.device): |
@@ -991,37 +951,6 @@ def initialize_model_parallel( |
991 | 951 | _DP.rank_in_group, _PP.rank_in_group, _TP.rank_in_group) |
992 | 952 |
|
993 | 953 |
|
994 | | -def ensure_kv_transfer_initialized(vllm_config: "VllmConfig") -> None: |
995 | | - """ |
996 | | - Initialize KV cache transfer parallel group. |
997 | | - """ |
998 | | - |
999 | | - global _KV_CONNECTOR_AGENT |
1000 | | - |
1001 | | - if vllm_config.kv_transfer_config is None: |
1002 | | - return |
1003 | | - |
1004 | | - if all([ |
1005 | | - vllm_config.kv_transfer_config.is_kv_transfer_instance, |
1006 | | - _KV_CONNECTOR_AGENT is None |
1007 | | - ]): |
1008 | | - from vllm.distributed.kv_transfer.kv_connector.factory import ( |
1009 | | - KVConnectorFactory) |
1010 | | - from vllm.distributed.kv_transfer.kv_connector.v1 import ( |
1011 | | - KVConnectorRole as KVConnectorRole_V1) |
1012 | | - |
1013 | | - kwargs = { |
1014 | | - "rank": get_world_group().rank, |
1015 | | - "local_rank": get_world_group().local_rank, |
1016 | | - "config": vllm_config, |
1017 | | - # NOTE(Kuntai): |
1018 | | - # Parallel state is initialized in v1 worker, |
1019 | | - # so this connector is for sure worker connector. |
1020 | | - "role": KVConnectorRole_V1.WORKER, |
1021 | | - } |
1022 | | - _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(**kwargs) |
1023 | | - |
1024 | | - |
1025 | 954 | def ensure_model_parallel_initialized( |
1026 | 955 | tensor_model_parallel_size: int, |
1027 | 956 | pipeline_model_parallel_size: int, |
|
0 commit comments