Skip to content

Commit a58480b

Browse files
delockdeepcharm
authored andcommitted
Add qwen3 meta loading for AutoTP (deepspeedai#7293)
This PR fixes deepspeedai#7275 to enable Qwen3 meta loading for AutoTP Signed-off-by: Ma, Guokai <guokai.ma@intel.com> Signed-off-by: Max Kovalenko <mkovalenko@habana.ai>
1 parent a25ac3f commit a58480b

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

deepspeed/module_inject/auto_tp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ def is_load_module(module):
137137
"LPLayerNorm", "SharedEmbedding", "OPTLearnedPositionalEmbedding", "LlamaRMSNorm", "FalconLinear",
138138
"MistralRMSNorm", "T5LayerNorm", "MixtralRMSNorm", "Phi3RotaryEmbedding", "Phi3SuScaledRotaryEmbedding",
139139
"Phi3RMSNorm", "YuanRMSNorm", "YuanRotaryEmbedding", "Phi3LongRoPEScaledRotaryEmbedding", "Qwen2RMSNorm",
140-
"DeepseekV2RMSNorm", "DeepseekV3RMSNorm", "DeepseekV2YarnRotaryEmbedding", "DeepseekV3YarnRotaryEmbedding",
141-
"MoEGate"
140+
"Qwen3RMSNorm", "DeepseekV2RMSNorm", "DeepseekV3RMSNorm", "DeepseekV2YarnRotaryEmbedding",
141+
"DeepseekV3YarnRotaryEmbedding", "MoEGate"
142142
]
143143
return module.__class__ in load_layers or module._get_name() in load_layer_names
144144

0 commit comments

Comments
 (0)