Skip to content

Commit d54c2f1

Browse files
committed
fix bug
Signed-off-by: zzhx1 <zzh_201018@outlook.com>
1 parent 3c52ec7 commit d54c2f1

File tree

3 files changed

+4
-7
lines changed

3 files changed

+4
-7
lines changed

vllm_ascend/ascend_config.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ def __init__(self, vllm_config):
5757
assert(
5858
self.torchair_graph_config.enabled == True
5959
), "lmhead_tensor_parallel_size is only supported in graph mode"
60-
assert(
61-
vllm_config.kv_transfer_config is not None and vllm_config.kv_transfer_config.is_kv_consumer
62-
),"lmhead_tensor_parallel_size is only supported in pd scenario and can only be used in D node."
6360

6461

6562
class TorchairGraphConfig:

vllm_ascend/ops/vocab_parallel_embedding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
_prune_hidden_states
4242
)
4343
from vllm.model_executor.parameter import BasevLLMParameter
44-
from vllm.model_executor.utils import set_weight_attrs, _enable_lmhead_tp
44+
from vllm.model_executor.utils import set_weight_attrs
4545
from vllm.model_executor.sampling_metadata import SamplingMetadata
4646
from vllm.model_executor.layers.quantization.base_config import (
4747
QuantizationConfig,
@@ -51,7 +51,7 @@
5151

5252
from vllm_ascend.distributed.parallel_state import get_lmheadtp_group
5353
from vllm_ascend.ascend_config import get_ascend_config
54-
54+
from vllm_ascend.utils import _enable_lmhead_tp
5555

5656
def get_masked_input_and_mask(
5757
input_: torch.Tensor, org_vocab_start_index: int,

vllm_ascend/worker/model_runner_v1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
from vllm.sampling_params import SamplingType
5858
from vllm.sequence import IntermediateTensors
5959
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler,
60-
LazyLoader, cdiv, _enable_lmhead_tp)
60+
LazyLoader, cdiv)
6161
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
6262
KVCacheSpec)
6363
from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, LogprobsTensors,
@@ -87,7 +87,7 @@
8787
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
8888
ProfileExecuteDuration, is_310p,
8989
maybe_converting_weight_acl_format,
90-
vllm_version_is)
90+
vllm_version_is, _enable_lmhead_tp)
9191
from vllm_ascend.worker.eagle_proposer_v1 import EagleProposer
9292
from vllm_ascend.worker.mtp_proposer_v1 import MtpProposer
9393
from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch

0 commit comments

Comments
 (0)