pad_input and unpad_input use same implementation as fa2

FightingZhen · FightingZhen · commit 886421d589bc · 2025-08-14T18:43:35.000+08:00
diff --git a/src/transformers/modeling_flash_attention_utils.py b/src/transformers/modeling_flash_attention_utils.py
@@ -83,7 +83,7 @@ def _lazy_imports(implementation: Optional[str]):
     if implementation == "flash_attention_2" and is_torch_npu_available():
         from .integrations.npu_flash_attention import npu_flash_attn_func as flash_attn_func
         from .integrations.npu_flash_attention import npu_flash_attn_varlen_func as flash_attn_varlen_func
-        from .integrations.npu_flash_attention import pad_input, unpad_input
+        pad_input, unpad_input = _pad_input, _unpad_input
     elif implementation == "flash_attention_2" or (implementation is None and is_fa2 and not is_fa3):
         from flash_attn import flash_attn_func, flash_attn_varlen_func
         from flash_attn.bert_padding import pad_input, unpad_input