diff --git a/src/transformers/models/gpt_oss/configuration_gpt_oss.py b/src/transformers/models/gpt_oss/configuration_gpt_oss.py index 0a120e7ec970..c93eb5f8dc70 100644 --- a/src/transformers/models/gpt_oss/configuration_gpt_oss.py +++ b/src/transformers/models/gpt_oss/configuration_gpt_oss.py @@ -98,17 +98,18 @@ def __init__( ] layer_type_validation(self.layer_types) + self.attention_bias = True + self.max_position_embeddings = max_position_embeddings + self.router_aux_loss_coef = router_aux_loss_coef + self.output_router_logits = output_router_logits + self.use_cache = use_cache + # Validate the correctness of rotary position embeddings parameters # BC: if there is a 'type' field, copy it it to 'rope_type'. if self.rope_scaling is not None and "type" in self.rope_scaling: self.rope_scaling["rope_type"] = self.rope_scaling["type"] rope_config_validation(self) - self.attention_bias = True - self.max_position_embeddings = max_position_embeddings - self.router_aux_loss_coef = router_aux_loss_coef - self.output_router_logits = output_router_logits - self.use_cache = use_cache super().__init__( tie_word_embeddings=tie_word_embeddings, **kwargs,