Skip to content

Commit ca2205e

Browse files
Isotr0pyadobrzyn
authored andcommitted
[Bugfix] Fix GLM rotary_dim issue and support v1 (vllm-project#16912)
Signed-off-by: isotr0py <2037008807@qq.com> Signed-off-by: Agata Dobrzyniewicz <adobrzyniewicz@habana.ai>
1 parent cc14657 commit ca2205e

File tree

1 file changed

+2
-3
lines changed
  • vllm/model_executor/models

1 file changed

+2
-3
lines changed

vllm/model_executor/models/glm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@
33
from vllm.config import VllmConfig
44
from vllm.model_executor.models.llama import LlamaForCausalLM
55

6-
from .interfaces import SupportsV0Only
76
from .utils import PPMissingLayer
87

98

10-
class GlmForCausalLM(LlamaForCausalLM, SupportsV0Only):
9+
class GlmForCausalLM(LlamaForCausalLM):
1110

1211
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
12+
vllm_config.model_config.hf_config.partial_rotary_factor = 0.5
1313
super().__init__(vllm_config=vllm_config, prefix=prefix)
1414
# Hack Llama model to fit HF format GLM implementation
1515
# Attention difference between GLM and Llama:
1616
# 1. Half partial rotary_dim and no Neox style.
1717
# 2. There is no bias for o_proj in attention
1818
for layer in self.model.layers:
1919
if not isinstance(layer, PPMissingLayer):
20-
layer.self_attn.rotary_emb.rotary_dim //= 2
2120
layer.self_attn.rotary_emb.is_neox_style = False
2221
layer.self_attn.o_proj.bias = None
2322
layer.self_attn.o_proj.skip_bias_add = True

0 commit comments

Comments
 (0)