From cd13ce2cf3d1f944bd02f07cc8abf11498832d2f Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Tue, 9 Sep 2025 11:59:58 +0000 Subject: [PATCH 01/12] add kernel --- src/transformers/models/mamba/modeling_mamba.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 3f39c2d8490b..9d8ad75c2c1b 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -33,7 +33,7 @@ auto_docstring, logging, ) -from ...utils.import_utils import is_causal_conv1d_available, is_mamba_ssm_available, is_mambapy_available +from ...utils.import_utils import is_kernels_available, is_mamba_ssm_available, is_mambapy_available from .configuration_mamba import MambaConfig @@ -50,8 +50,10 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None -if is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update +if is_kernels_available(): + from kernels import get_kernel + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + causal_conv1d_update, causal_conv1d_fn = kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn else: causal_conv1d_update, causal_conv1d_fn = None, None @@ -217,8 +219,8 @@ def warn_slow_implementation(self): if is_mambapy_available(): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d" + " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamaba-ssm and" + " install the kernels library using `pip install kernels`" ) else: raise ImportError( @@ -227,8 +229,8 @@ def warn_slow_implementation(self): else: logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamaba-ssm and" + " install the kernels library using `pip install kernels`. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def cuda_kernels_forward( From a333ef3eb794711b13991129418b147e18e09a86 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Tue, 9 Sep 2025 12:00:53 +0000 Subject: [PATCH 02/12] make style --- src/transformers/models/mamba/modeling_mamba.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 9d8ad75c2c1b..64f3d3869715 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -52,8 +52,12 @@ if is_kernels_available(): from kernels import get_kernel + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - causal_conv1d_update, causal_conv1d_fn = kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn + causal_conv1d_update, causal_conv1d_fn = ( + kernel_causal_conv1d.causal_conv1d_update, + kernel_causal_conv1d.causal_conv1d_fn, + ) else: causal_conv1d_update, causal_conv1d_fn = None, None From 0abdbe606d09bef3cc0ad5250b55dccba80df1eb Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Tue, 9 Sep 2025 13:30:55 +0000 Subject: [PATCH 03/12] keep causal-conv1d --- src/transformers/models/mamba/modeling_mamba.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 64f3d3869715..d01dad7ca3f2 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -33,7 +33,12 @@ auto_docstring, logging, ) -from ...utils.import_utils import is_kernels_available, is_mamba_ssm_available, is_mambapy_available +from ...utils.import_utils import ( + is_causal_conv1d_available, + is_kernels_available, + is_mamba_ssm_available, + is_mambapy_available, +) from .configuration_mamba import MambaConfig @@ -58,6 +63,8 @@ kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn, ) +elif is_causal_conv1d_available(): + from causal_conv1d import causal_conv1d_fn, causal_conv1d_update else: causal_conv1d_update, causal_conv1d_fn = None, None From 5c487d9cfc81547131fd1a67901b030951094575 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Wed, 10 Sep 2025 08:47:50 +0000 Subject: [PATCH 04/12] small fix --- src/transformers/models/mamba/modeling_mamba.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index d01dad7ca3f2..41d3a432cf0f 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -230,7 +230,7 @@ def warn_slow_implementation(self): if is_mambapy_available(): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamaba-ssm and" + " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" " install the kernels library using `pip install kernels`" ) else: @@ -240,7 +240,7 @@ def warn_slow_implementation(self): else: logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamaba-ssm and" + " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" " install the kernels library using `pip install kernels`. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) From 11fcf1e69baf16b1c7b7f2565a819b6e10db5268 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 08:29:17 +0000 Subject: [PATCH 05/12] small fix --- src/transformers/models/mamba/modeling_mamba.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 41d3a432cf0f..89eb6b704353 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -231,7 +231,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " install the kernels library using `pip install kernels`" + " install the kernels library using `pip install kernels` or https://github.com/Dao-AILab/causal-conv1d for causal-conv1d" ) else: raise ImportError( @@ -241,7 +241,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " install the kernels library using `pip install kernels`. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " install the kernels library using `pip install kernels` or https://github.com/Dao-AILab/causal-conv1d for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def cuda_kernels_forward( From 3556fc0e0d78c6be23d2035903636ccdfbb600c5 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 08:51:11 +0000 Subject: [PATCH 06/12] fix modular converter --- .../configuration_falcon_mamba.py | 16 +++++++++- .../falcon_mamba/modeling_falcon_mamba.py | 13 ++++++++ .../falcon_mamba/modular_falcon_mamba.py | 17 ++++++++-- utils/modular_model_converter.py | 32 +++++++++++++------ 4 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py index 7630ebd6343a..65f0462383c6 100644 --- a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py @@ -18,10 +18,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import math from ...configuration_utils import PretrainedConfig +from ...utils.import_utils import is_causal_conv1d_available, is_kernels_available + + +if is_kernels_available(): + from kernels import get_kernel + + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + causal_conv1d_update, causal_conv1d_fn = ( + kernel_causal_conv1d.causal_conv1d_update, + kernel_causal_conv1d.causal_conv1d_fn, + ) +elif is_causal_conv1d_available(): + from causal_conv1d import causal_conv1d_fn, causal_conv1d_update +else: + causal_conv1d_update, causal_conv1d_fn = None, None class FalconMambaConfig(PretrainedConfig): diff --git a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py index 7b61d2bdefd9..6b02b473b821 100644 --- a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py @@ -35,6 +35,7 @@ from ...utils import ModelOutput, auto_docstring, logging from ...utils.import_utils import ( is_causal_conv1d_available, + is_kernels_available, is_mamba_ssm_available, is_mambapy_available, ) @@ -54,6 +55,18 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None +if is_kernels_available(): + from kernels import get_kernel + + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + causal_conv1d_update, causal_conv1d_fn = ( + kernel_causal_conv1d.causal_conv1d_update, + kernel_causal_conv1d.causal_conv1d_fn, + ) +elif is_causal_conv1d_available(): + from causal_conv1d import causal_conv1d_fn, causal_conv1d_update +else: + causal_conv1d_update, causal_conv1d_fn = None, None if is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update else: diff --git a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py index 090a147d31e2..79116f0e22d7 100644 --- a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py @@ -21,7 +21,12 @@ from torch import nn from ...utils import auto_docstring, logging -from ...utils.import_utils import is_causal_conv1d_available, is_mamba_ssm_available, is_mambapy_available +from ...utils.import_utils import ( + is_causal_conv1d_available, + is_kernels_available, + is_mamba_ssm_available, + is_mambapy_available, +) from ..mamba.configuration_mamba import MambaConfig from ..mamba.modeling_mamba import ( MambaBlock, @@ -51,7 +56,15 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None -if is_causal_conv1d_available(): +if is_kernels_available(): + from kernels import get_kernel + + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + causal_conv1d_update, causal_conv1d_fn = ( + kernel_causal_conv1d.causal_conv1d_update, + kernel_causal_conv1d.causal_conv1d_fn, + ) +elif is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update else: causal_conv1d_update, causal_conv1d_fn = None, None diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index 54520b6aed0d..99ed8c72ba3a 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -1101,16 +1101,28 @@ def append_new_import_node( ): """Insert the new `node` to the list of `imports_to_keep` in-place, if it is not part of the `unused_imports` or `added_names`. Also modifies `added_names` in-place accordingly.""" - import_node = node.body[0] - names_to_keep = [] - for name in import_node.names: - name_value = name.evaluated_alias or name.evaluated_name - if name_value not in unused_imports and name_value not in added_names: - names_to_keep.append(name.with_changes(comma=cst.MaybeSentinel.DEFAULT)) - added_names.add(name_value) - if len(names_to_keep) > 0: - new_node = node.with_changes(body=[import_node.with_changes(names=names_to_keep)]) - imports_to_keep.append(new_node) + # Unwrap single-line statements (e.g., imports wrapped in SimpleStatementLine) + is_simple = isinstance(node, cst.SimpleStatementLine) + inner_stmt = node.body[0] if is_simple and len(node.body) > 0 else node + + # Handle imports with filtering + if isinstance(inner_stmt, (cst.Import, cst.ImportFrom)): + names_to_keep = [] + for name in inner_stmt.names: + name_value = name.evaluated_alias or name.evaluated_name + if name_value not in unused_imports and name_value not in added_names: + names_to_keep.append(name.with_changes(comma=cst.MaybeSentinel.DEFAULT)) + added_names.add(name_value) + if len(names_to_keep) > 0: + new_import = inner_stmt.with_changes(names=names_to_keep) + new_node = node.with_changes(body=[new_import]) if is_simple else new_import + imports_to_keep.append(new_node) + return + + # Handle assignment-like header statements (e.g., kernel setup) by preserving them as-is + if is_simple and isinstance(inner_stmt, (cst.Assign, cst.AnnAssign)): + imports_to_keep.append(node) + return def get_needed_imports(body: dict[str, dict], all_imports: list[cst.CSTNode]) -> list[cst.CSTNode]: From aca4ccd1063e9454d5e9a7b70ce624f8a9da0fd0 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 12:38:37 +0000 Subject: [PATCH 07/12] modular fix + lazy loading --- .../configuration_falcon_mamba.py | 16 +----- .../falcon_mamba/modeling_falcon_mamba.py | 50 +++++++++++-------- .../falcon_mamba/modular_falcon_mamba.py | 25 ++++------ .../models/mamba/modeling_mamba.py | 35 ++++++++----- 4 files changed, 62 insertions(+), 64 deletions(-) diff --git a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py index 65f0462383c6..7630ebd6343a 100644 --- a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py @@ -18,24 +18,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import math from ...configuration_utils import PretrainedConfig -from ...utils.import_utils import is_causal_conv1d_available, is_kernels_available - - -if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - causal_conv1d_update, causal_conv1d_fn = ( - kernel_causal_conv1d.causal_conv1d_update, - kernel_causal_conv1d.causal_conv1d_fn, - ) -elif is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update -else: - causal_conv1d_update, causal_conv1d_fn = None, None class FalconMambaConfig(PretrainedConfig): diff --git a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py index 6b02b473b821..e608593d99c3 100644 --- a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py @@ -55,23 +55,6 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None -if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - causal_conv1d_update, causal_conv1d_fn = ( - kernel_causal_conv1d.causal_conv1d_update, - kernel_causal_conv1d.causal_conv1d_fn, - ) -elif is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update -else: - causal_conv1d_update, causal_conv1d_fn = None, None -if is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update -else: - causal_conv1d_update, causal_conv1d_fn = None, None - logger = logging.get_logger(__name__) @@ -177,6 +160,28 @@ def reset(self): self.ssm_states[layer_idx].zero_() +def _lazy_load_causal_conv1d(): + global _causal_conv1d_cache + if _causal_conv1d_cache is not None: + return _causal_conv1d_cache + + if is_kernels_available(): + from kernels import get_kernel + + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + _causal_conv1d_cache = (kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn) + elif is_causal_conv1d_available(): + from causal_conv1d import causal_conv1d_fn, causal_conv1d_update + + _causal_conv1d_cache = (causal_conv1d_update, causal_conv1d_fn) + else: + _causal_conv1d_cache = (None, None) + return _causal_conv1d_cache + + +_causal_conv1d_cache = None + + def rms_forward(hidden_states, variance_epsilon=1e-6): """ Calculates simple RMSNorm with no learnable weights. `MambaRMSNorm` will @@ -256,6 +261,7 @@ def __init__(self, config: FalconMambaConfig, layer_idx: int): self.rms_eps = config.mixer_rms_eps def warn_slow_implementation(self): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) @@ -264,8 +270,8 @@ def warn_slow_implementation(self): if is_mambapy_available(): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d" + " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" + " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d" ) else: raise ImportError( @@ -274,8 +280,8 @@ def warn_slow_implementation(self): else: logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" + " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def cuda_kernels_forward( @@ -310,6 +316,7 @@ def cuda_kernels_forward( ) else: + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() hidden_states, gate = projected_states.chunk(2, dim=1) if attention_mask is not None: @@ -504,6 +511,7 @@ def forward( cache_position: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, ): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) diff --git a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py index 79116f0e22d7..3bbaeaa56af4 100644 --- a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py @@ -38,6 +38,7 @@ MambaOutput, MambaPreTrainedModel, MambaRMSNorm, + _lazy_load_causal_conv1d, ) @@ -56,18 +57,7 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None -if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - causal_conv1d_update, causal_conv1d_fn = ( - kernel_causal_conv1d.causal_conv1d_update, - kernel_causal_conv1d.causal_conv1d_fn, - ) -elif is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update -else: - causal_conv1d_update, causal_conv1d_fn = None, None +_causal_conv1d_cache = None class FalconMambaConfig(MambaConfig): @@ -238,6 +228,7 @@ def rms_forward(hidden_states, variance_epsilon=1e-6): class FalconMambaMixer(MambaMixer): def warn_slow_implementation(self): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) @@ -246,8 +237,8 @@ def warn_slow_implementation(self): if is_mambapy_available(): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d" + " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" + " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d" ) else: raise ImportError( @@ -256,8 +247,8 @@ def warn_slow_implementation(self): else: logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" - " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation and" - " https://github.com/Dao-AILab/causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" + " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def __init__(self, config: FalconMambaConfig, layer_idx: int): @@ -303,6 +294,7 @@ def cuda_kernels_forward( ) else: + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() hidden_states, gate = projected_states.chunk(2, dim=1) if attention_mask is not None: @@ -496,6 +488,7 @@ def forward( cache_position: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, ): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 89eb6b704353..6c10f7cc87a9 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -55,18 +55,26 @@ else: selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None -if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - causal_conv1d_update, causal_conv1d_fn = ( - kernel_causal_conv1d.causal_conv1d_update, - kernel_causal_conv1d.causal_conv1d_fn, - ) -elif is_causal_conv1d_available(): - from causal_conv1d import causal_conv1d_fn, causal_conv1d_update -else: - causal_conv1d_update, causal_conv1d_fn = None, None +_causal_conv1d_cache = None + + +def _lazy_load_causal_conv1d(): + global _causal_conv1d_cache + if _causal_conv1d_cache is not None: + return _causal_conv1d_cache + + if is_kernels_available(): + from kernels import get_kernel + + kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") + _causal_conv1d_cache = (kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn) + elif is_causal_conv1d_available(): + from causal_conv1d import causal_conv1d_fn, causal_conv1d_update + + _causal_conv1d_cache = (causal_conv1d_update, causal_conv1d_fn) + else: + _causal_conv1d_cache = (None, None) + return _causal_conv1d_cache class MambaCache: @@ -222,6 +230,7 @@ def __init__(self, config: MambaConfig, layer_idx: int): self.warn_slow_implementation() def warn_slow_implementation(self): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) @@ -272,6 +281,7 @@ def cuda_kernels_forward( ) else: + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() hidden_states, gate = projected_states.chunk(2, dim=1) if attention_mask is not None: @@ -435,6 +445,7 @@ def forward( cache_position: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.LongTensor] = None, ): + causal_conv1d_update, causal_conv1d_fn = _lazy_load_causal_conv1d() is_fast_path_available = all( (selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn) ) From 487d9725bfe92b47ae7741586417d162e442b900 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 12:46:44 +0000 Subject: [PATCH 08/12] revert changes modular --- utils/modular_model_converter.py | 34 +++++++++++--------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index 99ed8c72ba3a..dc4b53bf3193 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -1101,28 +1101,16 @@ def append_new_import_node( ): """Insert the new `node` to the list of `imports_to_keep` in-place, if it is not part of the `unused_imports` or `added_names`. Also modifies `added_names` in-place accordingly.""" - # Unwrap single-line statements (e.g., imports wrapped in SimpleStatementLine) - is_simple = isinstance(node, cst.SimpleStatementLine) - inner_stmt = node.body[0] if is_simple and len(node.body) > 0 else node - - # Handle imports with filtering - if isinstance(inner_stmt, (cst.Import, cst.ImportFrom)): - names_to_keep = [] - for name in inner_stmt.names: - name_value = name.evaluated_alias or name.evaluated_name - if name_value not in unused_imports and name_value not in added_names: - names_to_keep.append(name.with_changes(comma=cst.MaybeSentinel.DEFAULT)) - added_names.add(name_value) - if len(names_to_keep) > 0: - new_import = inner_stmt.with_changes(names=names_to_keep) - new_node = node.with_changes(body=[new_import]) if is_simple else new_import - imports_to_keep.append(new_node) - return - - # Handle assignment-like header statements (e.g., kernel setup) by preserving them as-is - if is_simple and isinstance(inner_stmt, (cst.Assign, cst.AnnAssign)): - imports_to_keep.append(node) - return + import_node = node.body[0] + names_to_keep = [] + for name in import_node.names: + name_value = name.evaluated_alias or name.evaluated_name + if name_value not in unused_imports and name_value not in added_names: + names_to_keep.append(name.with_changes(comma=cst.MaybeSentinel.DEFAULT)) + added_names.add(name_value) + if len(names_to_keep) > 0: + new_node = node.with_changes(body=[import_node.with_changes(names=names_to_keep)]) + imports_to_keep.append(new_node) def get_needed_imports(body: dict[str, dict], all_imports: list[cst.CSTNode]) -> list[cst.CSTNode]: @@ -1797,4 +1785,4 @@ def run_converter(modular_file: str): # Process files with diff workers = min(num_workers, len(dependency_level_files)) with mp.Pool(workers) as pool: - pool.map(run_converter, dependency_level_files) + pool.map(run_converter, dependency_level_files) \ No newline at end of file From 6d022ae124fc84adf1c93e4cc8b69f71269c22c4 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 12:47:29 +0000 Subject: [PATCH 09/12] nit --- utils/modular_model_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/modular_model_converter.py b/utils/modular_model_converter.py index dc4b53bf3193..54520b6aed0d 100644 --- a/utils/modular_model_converter.py +++ b/utils/modular_model_converter.py @@ -1785,4 +1785,4 @@ def run_converter(modular_file: str): # Process files with diff workers = min(num_workers, len(dependency_level_files)) with mp.Pool(workers) as pool: - pool.map(run_converter, dependency_level_files) \ No newline at end of file + pool.map(run_converter, dependency_level_files) From 2b930935a733471814cd4afa25c6d929381691ef Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Thu, 11 Sep 2025 12:59:20 +0000 Subject: [PATCH 10/12] hub kernels update --- src/transformers/integrations/__init__.py | 2 ++ src/transformers/integrations/hub_kernels.py | 6 ++++++ .../models/falcon_mamba/modeling_falcon_mamba.py | 6 ++---- .../models/falcon_mamba/modular_falcon_mamba.py | 2 -- src/transformers/models/mamba/modeling_mamba.py | 6 ++---- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py index 58ca68bb3326..9ff6ec79961b 100755 --- a/src/transformers/integrations/__init__.py +++ b/src/transformers/integrations/__init__.py @@ -72,6 +72,7 @@ "hqq": ["prepare_for_hqq_linear"], "hub_kernels": [ "LayerRepository", + "_lazy_loading_kernel", "register_kernel_mapping", "replace_kernel_forward_from_hub", "use_kernel_forward_from_hub", @@ -217,6 +218,7 @@ from .hqq import prepare_for_hqq_linear from .hub_kernels import ( LayerRepository, + _lazy_loading_kernel, register_kernel_mapping, replace_kernel_forward_from_hub, use_kernel_forward_from_hub, diff --git a/src/transformers/integrations/hub_kernels.py b/src/transformers/integrations/hub_kernels.py index 248b6b1b0b9d..327c129c3b34 100644 --- a/src/transformers/integrations/hub_kernels.py +++ b/src/transformers/integrations/hub_kernels.py @@ -169,9 +169,15 @@ def load_and_register_kernel(attn_implementation: str) -> None: ALL_MASK_ATTENTION_FUNCTIONS.register(attn_implementation, ALL_MASK_ATTENTION_FUNCTIONS["flash_attention_2"]) +def _lazy_loading_kernel(kernel_name: str) -> None: + kernel = get_kernel(kernel_name) + return kernel + + __all__ = [ "LayerRepository", "use_kernel_forward_from_hub", "register_kernel_mapping", "replace_kernel_forward_from_hub", + "_lazy_loading_kernel", ] diff --git a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py index e608593d99c3..32bbcee5a8c9 100644 --- a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py @@ -30,6 +30,7 @@ from ...activations import ACT2FN from ...configuration_utils import PretrainedConfig from ...generation import GenerationMixin +from ...integrations import _lazy_loading_kernel from ...modeling_layers import GradientCheckpointingLayer from ...modeling_utils import PreTrainedModel from ...utils import ModelOutput, auto_docstring, logging @@ -166,10 +167,7 @@ def _lazy_load_causal_conv1d(): return _causal_conv1d_cache if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - _causal_conv1d_cache = (kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn) + _lazy_loading_kernel("kernels-community/causal-conv1d") elif is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update diff --git a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py index 3bbaeaa56af4..a84b95debf5a 100644 --- a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py @@ -22,8 +22,6 @@ from ...utils import auto_docstring, logging from ...utils.import_utils import ( - is_causal_conv1d_available, - is_kernels_available, is_mamba_ssm_available, is_mambapy_available, ) diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 6c10f7cc87a9..45c328d1a1b9 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -26,6 +26,7 @@ from ...activations import ACT2FN from ...configuration_utils import PretrainedConfig from ...generation import GenerationMixin +from ...integrations import _lazy_loading_kernel from ...modeling_layers import GradientCheckpointingLayer from ...modeling_utils import PreTrainedModel from ...utils import ( @@ -64,10 +65,7 @@ def _lazy_load_causal_conv1d(): return _causal_conv1d_cache if is_kernels_available(): - from kernels import get_kernel - - kernel_causal_conv1d = get_kernel("kernels-community/causal-conv1d") - _causal_conv1d_cache = (kernel_causal_conv1d.causal_conv1d_update, kernel_causal_conv1d.causal_conv1d_fn) + _lazy_loading_kernel("kernels-community/causal-conv1d") elif is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update From 831847874bbc67d0955ee85fec571c4159aa1cc2 Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Fri, 12 Sep 2025 09:31:15 +0000 Subject: [PATCH 11/12] update --- src/transformers/integrations/__init__.py | 2 -- src/transformers/integrations/hub_kernels.py | 6 ------ .../models/falcon_mamba/modeling_falcon_mamba.py | 6 ++++-- src/transformers/models/mamba/modeling_mamba.py | 6 ++++-- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py index 9ff6ec79961b..58ca68bb3326 100755 --- a/src/transformers/integrations/__init__.py +++ b/src/transformers/integrations/__init__.py @@ -72,7 +72,6 @@ "hqq": ["prepare_for_hqq_linear"], "hub_kernels": [ "LayerRepository", - "_lazy_loading_kernel", "register_kernel_mapping", "replace_kernel_forward_from_hub", "use_kernel_forward_from_hub", @@ -218,7 +217,6 @@ from .hqq import prepare_for_hqq_linear from .hub_kernels import ( LayerRepository, - _lazy_loading_kernel, register_kernel_mapping, replace_kernel_forward_from_hub, use_kernel_forward_from_hub, diff --git a/src/transformers/integrations/hub_kernels.py b/src/transformers/integrations/hub_kernels.py index 327c129c3b34..248b6b1b0b9d 100644 --- a/src/transformers/integrations/hub_kernels.py +++ b/src/transformers/integrations/hub_kernels.py @@ -169,15 +169,9 @@ def load_and_register_kernel(attn_implementation: str) -> None: ALL_MASK_ATTENTION_FUNCTIONS.register(attn_implementation, ALL_MASK_ATTENTION_FUNCTIONS["flash_attention_2"]) -def _lazy_loading_kernel(kernel_name: str) -> None: - kernel = get_kernel(kernel_name) - return kernel - - __all__ = [ "LayerRepository", "use_kernel_forward_from_hub", "register_kernel_mapping", "replace_kernel_forward_from_hub", - "_lazy_loading_kernel", ] diff --git a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py index 32bbcee5a8c9..2dc137d614f8 100644 --- a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py @@ -30,7 +30,6 @@ from ...activations import ACT2FN from ...configuration_utils import PretrainedConfig from ...generation import GenerationMixin -from ...integrations import _lazy_loading_kernel from ...modeling_layers import GradientCheckpointingLayer from ...modeling_utils import PreTrainedModel from ...utils import ModelOutput, auto_docstring, logging @@ -167,7 +166,10 @@ def _lazy_load_causal_conv1d(): return _causal_conv1d_cache if is_kernels_available(): - _lazy_loading_kernel("kernels-community/causal-conv1d") + from kernels import get_kernel + + _causal_conv1d_kernel = get_kernel("kernels-community/causal-conv1d") + _causal_conv1d_cache = (_causal_conv1d_kernel.causal_conv1d_update, _causal_conv1d_kernel.causal_conv1d_fn) elif is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update diff --git a/src/transformers/models/mamba/modeling_mamba.py b/src/transformers/models/mamba/modeling_mamba.py index 45c328d1a1b9..9cdc63a9943a 100644 --- a/src/transformers/models/mamba/modeling_mamba.py +++ b/src/transformers/models/mamba/modeling_mamba.py @@ -26,7 +26,6 @@ from ...activations import ACT2FN from ...configuration_utils import PretrainedConfig from ...generation import GenerationMixin -from ...integrations import _lazy_loading_kernel from ...modeling_layers import GradientCheckpointingLayer from ...modeling_utils import PreTrainedModel from ...utils import ( @@ -65,7 +64,10 @@ def _lazy_load_causal_conv1d(): return _causal_conv1d_cache if is_kernels_available(): - _lazy_loading_kernel("kernels-community/causal-conv1d") + from kernels import get_kernel + + _causal_conv1d_kernel = get_kernel("kernels-community/causal-conv1d") + _causal_conv1d_cache = (_causal_conv1d_kernel.causal_conv1d_update, _causal_conv1d_kernel.causal_conv1d_fn) elif is_causal_conv1d_available(): from causal_conv1d import causal_conv1d_fn, causal_conv1d_update From f297b9f68567b1114711875a2ab966f34e239a2d Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Fri, 12 Sep 2025 09:35:36 +0000 Subject: [PATCH 12/12] small nit --- src/transformers/models/falcon_mamba/modeling_falcon_mamba.py | 4 ++-- src/transformers/models/falcon_mamba/modular_falcon_mamba.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py index 2dc137d614f8..1c832e84932f 100644 --- a/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modeling_falcon_mamba.py @@ -271,7 +271,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d" + " https://github.com/Dao-AILab/causal-conv1d or `pip install kernels` for causal-conv1d" ) else: raise ImportError( @@ -281,7 +281,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " https://github.com/Dao-AILab/causal-conv1d or `pip install kernels` for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def cuda_kernels_forward( diff --git a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py index a84b95debf5a..7534d1b6c68a 100644 --- a/src/transformers/models/falcon_mamba/modular_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/modular_falcon_mamba.py @@ -236,7 +236,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the mamba.py backend. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d" + " https://github.com/Dao-AILab/causal-conv1d or `pip install kernels` for causal-conv1d" ) else: raise ImportError( @@ -246,7 +246,7 @@ def warn_slow_implementation(self): logger.warning_once( "The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)`" " is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation for mamba-ssm and" - " https://github.com/Dao-AILab/causal-conv1d or install kernels for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." + " https://github.com/Dao-AILab/causal-conv1d or `pip install kernels` for causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py." ) def __init__(self, config: FalconMambaConfig, layer_idx: int):