remove redundant apply_rotary_emb usage

FightingZhen · FightingZhen · commit 03f4376ea575 · 2025-08-14T10:06:05.000+08:00
diff --git a/src/transformers/integrations/npu_flash_attention.py b/src/transformers/integrations/npu_flash_attention.py
@@ -252,18 +252,3 @@ def npu_flash_attn_varlen_func(
 
     return output
 
-
-def npu_apply_rotary_emb(x, cos, sin, **kwargs):
-    # cos tensor after chunk should be repeated through chunked dimension to original shape on Ascend NPU
-    if len(cos.shape) == 2 and cos.shape[-1] == x.shape[-1] // 2:
-        cos = cos.repeat(1, 2)
-        # cos tensor with [S,D] shape should be unsqueezed to 4-d tensor with shape [1,S,1,D]
-        cos = cos.unsqueeze(0).unsqueeze(2)
-
-    # sin tensor after chunk should be repeated through chunked dimension to original shape on Ascend NPU
-    if len(sin.shape) == 2 and sin.shape[-1] == x.shape[-1] // 2:
-        sin = sin.repeat(1, 2)
-        # sin tensor with [S,D] shape should be unsqueezed to 4-d tensor with shape [1,S,1,D]
-        sin = sin.unsqueeze(0).unsqueeze(2)
-
-    return npu_rotary_mul(x, cos, sin)
diff --git a/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py b/src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py
@@ -44,7 +44,6 @@
 from ...cache_utils import Cache
 from ...configuration_utils import PretrainedConfig, layer_type_validation
 from ...generation import GenerationMixin
-from ...modeling_flash_attention_utils import is_flash_attn_available
 from ...modeling_outputs import BaseModelOutput, ModelOutput
 from ...modeling_rope_utils import rope_config_validation
 from ...modeling_utils import ALL_ATTENTION_FUNCTIONS
@@ -58,13 +57,6 @@
 from ...utils.hub import cached_file
 
 
-if is_flash_attn_available():
-    from ...modeling_flash_attention_utils import apply_rotary_emb, flash_attn_varlen_func
-else:
-    flash_attn_varlen_func = None
-    apply_rotary_emb = None
-
-
 logger = logging.get_logger(__name__)