|
46 | 46 | from vllm.logger import init_logger |
47 | 47 | from vllm.model_executor.layers.activation import SiluAndMul |
48 | 48 | from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase |
49 | | -from vllm.model_executor.layers.fused_moe import FusedMoE |
| 49 | +from vllm.model_executor.layers.fused_moe import SharedFusedMoE |
50 | 50 | from vllm.model_executor.layers.layernorm import LayerNorm, RMSNorm |
51 | 51 | from vllm.model_executor.layers.linear import (ColumnParallelLinear, |
52 | 52 | MergedColumnParallelLinear, |
|
58 | 58 | from vllm.model_executor.layers.quantization.utils.fp8_utils import ( |
59 | 59 | per_token_group_quant_fp8) |
60 | 60 | from vllm.model_executor.layers.rotary_embedding import get_rope |
61 | | -from vllm.model_executor.layers.shared_fused_moe import SharedFusedMoE |
62 | 61 | from vllm.model_executor.layers.vocab_parallel_embedding import ( |
63 | 62 | ParallelLMHead, VocabParallelEmbedding) |
64 | 63 | from vllm.model_executor.model_loader.weight_utils import ( |
@@ -1206,7 +1205,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): |
1206 | 1205 | config.first_k_dense_replace) |
1207 | 1206 | self.num_expert_groups = config.n_group |
1208 | 1207 |
|
1209 | | - self.moe_layers: list[FusedMoE] = [] |
| 1208 | + self.moe_layers: list[SharedFusedMoE] = [] |
1210 | 1209 | example_moe = None |
1211 | 1210 | for layer in self.model.layers: |
1212 | 1211 | if isinstance(layer, PPMissingLayer): |
@@ -1295,7 +1294,7 @@ def load_weights(self, weights: Iterable[tuple[str, |
1295 | 1294 |
|
1296 | 1295 | # Params for weights, fp8 weight scales, fp8 activation scales |
1297 | 1296 | # (param_name, weight_name, expert_id, shard_id) |
1298 | | - expert_params_mapping = FusedMoE.make_expert_params_mapping( |
| 1297 | + expert_params_mapping = SharedFusedMoE.make_expert_params_mapping( |
1299 | 1298 | ckpt_gate_proj_name="gate_proj", |
1300 | 1299 | ckpt_down_proj_name="down_proj", |
1301 | 1300 | ckpt_up_proj_name="up_proj", |
|
0 commit comments