diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1f1db341c82de9..4bf26ab77bb746 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -89,7 +89,7 @@ repos:
 
             # | python/paddle/j.+
 
-            # | python/paddle/[k-n].+
+            | python/paddle/[k-n].+
 
             # | python/paddle/[o-t].+
 
@@ -145,7 +145,7 @@ repos:
 
             | python/paddle/j.+
 
-            | python/paddle/[k-n].+
+            # | python/paddle/[k-n].+
 
             | python/paddle/[o-t].+
 
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index c3ddf5f8dd7973..863a2c7e47ea65 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -603,9 +603,9 @@ def prelu(
                [-1.25000000,  6.        ,  7.        , -2.        ],
                [ 6.        ,  7.        ,  8.        ,  9.        ]]]])
     """
-    assert (
-        len(weight.shape) == 0 or len(weight.shape) == 1
-    ), "The dim count of weight shape should be 0 or 1 in prelu()."
+    assert len(weight.shape) == 0 or len(weight.shape) == 1, (
+        "The dim count of weight shape should be 0 or 1 in prelu()."
+    )
 
     mode = 'all'
     if len(weight.shape) == 1 and weight.shape[0] > 1:
@@ -626,19 +626,19 @@ def prelu(
 
         data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC'
 
-        assert (
-            len(x.shape) > 1
-        ), "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]."
+        assert len(x.shape) > 1, (
+            "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]."
+        )
 
         # NOTE(GuoxiaWang): support NHWC data format
         if data_format == 'NHWC':
-            assert (
-                weight.shape[0] == x.shape[-1]
-            ), "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
+            assert weight.shape[0] == x.shape[-1], (
+                "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
+            )
         else:
-            assert (
-                weight.shape[0] == x.shape[1]
-            ), "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
+            assert weight.shape[0] == x.shape[1], (
+                "The weight size should be equal to x input channel in prelu() when weight shape is not [1]."
+            )
         mode = 'channel'
 
     if in_dynamic_or_pir_mode():
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 02575f0e4fa4cb..7f2e3d0ccbc1c5 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -601,9 +601,9 @@ def _is_list_or_tuple_(data):
                 if isinstance(dim_size, (Variable, paddle.pir.Value)):
                     contain_var = True
                     continue
-                assert (
-                    dim_size > 0
-                ), "Each dimension size given in out_shape must be greater than 0."
+                assert dim_size > 0, (
+                    "Each dimension size given in out_shape must be greater than 0."
+                )
 
             if contain_var:
                 new_size_tensor = []
@@ -2068,7 +2068,9 @@ def pad(
         'replicate',
         'constant',
         'circular',
-    ], f"mode should be one of constant, reflect, replicate, circular, but got {mode}."
+    ], (
+        f"mode should be one of constant, reflect, replicate, circular, but got {mode}."
+    )
 
     x_dim = len(x.shape)
     if in_dynamic_mode():
@@ -2162,9 +2164,9 @@ def pad(
         4: ["NCHW", "NHWC"],
         5: ["NCDHW", "NDHWC"],
     }
-    assert (
-        data_format in supported_format_map[x_dim]
-    ), f"input tensor dimension is {x_dim}, it's data format should be in {supported_format_map[x_dim]} but got {data_format}"
+    assert data_format in supported_format_map[x_dim], (
+        f"input tensor dimension is {x_dim}, it's data format should be in {supported_format_map[x_dim]} but got {data_format}"
+    )
 
     unsqueezed_dim = []
 
@@ -2831,9 +2833,9 @@ def fold(
     )
 
     assert len(x.shape) == 3, "input should be the format of [N, C, L]"
-    assert (
-        math.prod(x.shape) >= 0
-    ), "The number of elements must greater or equal than zero."
+    assert math.prod(x.shape) >= 0, (
+        "The number of elements must greater or equal than zero."
+    )
 
     def _is_list_or_tuple_(data):
         return isinstance(data, (list, tuple))
@@ -2841,30 +2843,30 @@ def _is_list_or_tuple_(data):
     if isinstance(output_sizes, int):
         output_sizes = [output_sizes, output_sizes]
     else:
-        assert _is_list_or_tuple_(output_sizes) and (
-            len(output_sizes) == 2
-        ), "output_sizes should either be an integer or a list/tuple of two integers"
+        assert _is_list_or_tuple_(output_sizes) and (len(output_sizes) == 2), (
+            "output_sizes should either be an integer or a list/tuple of two integers"
+        )
 
     if isinstance(kernel_sizes, int):
         kernel_sizes = [kernel_sizes, kernel_sizes]
     else:
-        assert _is_list_or_tuple_(kernel_sizes) and (
-            len(kernel_sizes) == 2
-        ), "kernel_sizes should either be an integer or a list/tuple of two integers"
+        assert _is_list_or_tuple_(kernel_sizes) and (len(kernel_sizes) == 2), (
+            "kernel_sizes should either be an integer or a list/tuple of two integers"
+        )
 
     if isinstance(strides, int):
         strides = [strides, strides]
     else:
-        assert _is_list_or_tuple_(strides) and (
-            len(strides) == 2
-        ), "strides should either be an integer or a list/tuple of two integers"
+        assert _is_list_or_tuple_(strides) and (len(strides) == 2), (
+            "strides should either be an integer or a list/tuple of two integers"
+        )
 
     if isinstance(dilations, int):
         dilations = [dilations, dilations]
     else:
-        assert _is_list_or_tuple_(dilations) and (
-            len(dilations) == 2
-        ), "dilations should either be an integer or a list/tuple of two integers"
+        assert _is_list_or_tuple_(dilations) and (len(dilations) == 2), (
+            "dilations should either be an integer or a list/tuple of two integers"
+        )
 
     if isinstance(paddings, int):
         paddings = [paddings] * 4
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 121da930dc3c40..6d6b9bd3bdd531 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -272,9 +272,9 @@ def _conv_nd(
                     attrs={'axis': -1},
                 )
             else:
-                assert len(x_shape) > len(
-                    y_shape
-                ), 'The length of pre_bias must greater than the length of bias'
+                assert len(x_shape) > len(y_shape), (
+                    'The length of pre_bias must greater than the length of bias'
+                )
                 padding = len(x_shape) - len(y_shape) - channel_dim
                 bias = reshape(
                     bias, [1] * channel_dim + y_shape + [1] * padding
@@ -1336,9 +1336,9 @@ def conv2d_transpose(
                     attrs={'axis': -1},
                 )
             else:
-                assert len(x_shape) > len(
-                    y_shape
-                ), 'The length of pre_bias must greater than the length of bias'
+                assert len(x_shape) > len(y_shape), (
+                    'The length of pre_bias must greater than the length of bias'
+                )
                 padding = len(x_shape) - len(y_shape) - channel_dim
                 bias = reshape(
                     bias, [1] * channel_dim + y_shape + [1] * padding
diff --git a/python/paddle/nn/functional/flash_attention.py b/python/paddle/nn/functional/flash_attention.py
index 4a7ab07cef44e6..c6f2856e228218 100644
--- a/python/paddle/nn/functional/flash_attention.py
+++ b/python/paddle/nn/functional/flash_attention.py
@@ -508,30 +508,30 @@ def flash_attention(
             fa_version = paddle.base.framework.get_flags(
                 ["FLAGS_flash_attn_version"]
             )["FLAGS_flash_attn_version"]
-        assert (
-            in_dynamic_or_pir_mode() or fa_version == 2
-        ), "flash attention 3 only support dynamic or pir mode"
-        assert (
-            dropout == 0.0 or fa_version == 2
-        ), "flash attention 3 does not support dropout"
-        assert (
-            not return_softmax or fa_version == 2
-        ), "flash attention 3 does not support return softmax"
-        assert (
-            fixed_seed_offset is None or fa_version == 2
-        ), "flash attention 3 does not support return softmax"
-        assert (
-            rng_name == "" or fa_version == 2
-        ), "flash attention 3 does not support setting rng_name"
-        assert (
-            training or fa_version == 2
-        ), "flash attention 3 does not support setting training"
-        assert (
-            name is None or fa_version == 2
-        ), "flash attention 3 does not support setting name"
-        assert (
-            softmax_scale is None or fa_version == 3
-        ), "flash attention 2 does not support setting softmax_scale"
+        assert in_dynamic_or_pir_mode() or fa_version == 2, (
+            "flash attention 3 only support dynamic or pir mode"
+        )
+        assert dropout == 0.0 or fa_version == 2, (
+            "flash attention 3 does not support dropout"
+        )
+        assert not return_softmax or fa_version == 2, (
+            "flash attention 3 does not support return softmax"
+        )
+        assert fixed_seed_offset is None or fa_version == 2, (
+            "flash attention 3 does not support return softmax"
+        )
+        assert rng_name == "" or fa_version == 2, (
+            "flash attention 3 does not support setting rng_name"
+        )
+        assert training or fa_version == 2, (
+            "flash attention 3 does not support setting training"
+        )
+        assert name is None or fa_version == 2, (
+            "flash attention 3 does not support setting name"
+        )
+        assert softmax_scale is None or fa_version == 3, (
+            "flash attention 2 does not support setting softmax_scale"
+        )
         if in_dynamic_or_pir_mode():
             if fa_version == 2:
                 (result_attention, result_softmax, _, _) = _C_ops.flash_attn(
@@ -1142,9 +1142,9 @@ def flash_attn_varlen_func(
             >>> output = paddle.nn.functional.flash_attention.flash_attention_v3_varlen(q, q, q, cu_seqlens_q, cu_seqlens_q, max_seqlen_q=max_seq_len_q, max_seqlen_k=max_seq_len_q, causal=True)
             >>> # doctest: -SKIP
     """
-    assert (
-        "xpu" not in paddle.get_device()
-    ), "flash_attn_varlen_func is not supported on xpu"
+    assert "xpu" not in paddle.get_device(), (
+        "flash_attn_varlen_func is not supported on xpu"
+    )
 
     assert not paddle.get_flags(["FLAGS_cudnn_deterministic"])[
         "FLAGS_cudnn_deterministic"
@@ -1157,9 +1157,9 @@ def flash_attn_varlen_func(
         == 3
     ), "FLAGS_flash_attn_version is 2, conflicts with flash_attn_varlen_func"
 
-    assert (
-        in_dynamic_or_pir_mode()
-    ), "flash_attn_varlen_func only support dynamic or pir mode"
+    assert in_dynamic_or_pir_mode(), (
+        "flash_attn_varlen_func only support dynamic or pir mode"
+    )
 
     assert qv is None, "flash_attn_varlen_func does not support setting qv"
 
@@ -2203,9 +2203,9 @@ def flashmask_attention(
             window_size = (window_size, window_size)
         sq = query.shape[1]
         bsz = query.shape[0]
-        assert (
-            startend_row_indices is None
-        ), "can't use window_size with startend_row_indices"
+        assert startend_row_indices is None, (
+            "can't use window_size with startend_row_indices"
+        )
         if causal:
             startend_row_indices = paddle.arange(
                 window_size[0] + 1, sq + window_size[0] + 1, dtype="int32"
@@ -2246,24 +2246,26 @@ def flashmask_attention(
         )
 
     else:
-        assert (
-            startend_row_indices.dtype == paddle.int32
-        ), f"startend_row_indices.dtype must be paddle.int32, but got {startend_row_indices.dtype}"
-        assert (
-            len(startend_row_indices.shape) == 4
-        ), f"startend_row_indices rank must be 4,but got {startend_row_indices.shape}"
-
-        assert (
-            startend_row_indices.shape[0] == key.shape[0]
-        ), f"startend_row_indices.shape[0] must be equal to batch_size, but got {startend_row_indices.shape[0]} and {key.shape[0]}"
-
-        assert (
-            startend_row_indices.shape[2] == key.shape[1]
-        ), f"startend_row_indices.shape[2] must be equal to seqlen_k, but got {startend_row_indices.shape[2]} and {key.shape[2]}"
+        assert startend_row_indices.dtype == paddle.int32, (
+            f"startend_row_indices.dtype must be paddle.int32, but got {startend_row_indices.dtype}"
+        )
+        assert len(startend_row_indices.shape) == 4, (
+            f"startend_row_indices rank must be 4,but got {startend_row_indices.shape}"
+        )
+
+        assert startend_row_indices.shape[0] == key.shape[0], (
+            f"startend_row_indices.shape[0] must be equal to batch_size, but got {startend_row_indices.shape[0]} and {key.shape[0]}"
+        )
+
+        assert startend_row_indices.shape[2] == key.shape[1], (
+            f"startend_row_indices.shape[2] must be equal to seqlen_k, but got {startend_row_indices.shape[2]} and {key.shape[2]}"
+        )
         assert startend_row_indices.shape[1] in [
             1,
             key.shape[2],
-        ], "startend_row_indices head_num must be equal to 1(broadcast) or head_num_k."
+        ], (
+            "startend_row_indices head_num must be equal to 1(broadcast) or head_num_k."
+        )
 
         if causal:
             if startend_row_indices.shape[-1] == 1:
@@ -2383,9 +2385,9 @@ def calc_reduced_attention_scores(
             >>> )
             >>> # doctest: -SKIP
     """
-    assert (
-        query.stop_gradient and key.stop_gradient
-    ), 'calc_reduced_attention_scores() is for inference only.'
+    assert query.stop_gradient and key.stop_gradient, (
+        'calc_reduced_attention_scores() is for inference only.'
+    )
 
     if in_dynamic_or_pir_mode():
         reduced_scores = _C_ops.calc_reduced_attn_scores(
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 907394d96b4179..b6e484aded5924 100644
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -94,9 +94,9 @@ def dice_loss(
     """
     assert input.dtype in (paddle.float32, paddle.float64)
     assert label.dtype in (paddle.int32, paddle.int64)
-    assert (
-        len(input.shape) >= 2
-    ), "The rank of input should be greater than or equal to 2."
+    assert len(input.shape) >= 2, (
+        "The rank of input should be greater than or equal to 2."
+    )
     assert len(input.shape) == len(label.shape), (
         "The rank of input and label should be equal, "
         f"but received input: {len(input.shape)}, label: {len(label.shape)}."
@@ -105,9 +105,9 @@ def dice_loss(
         "The last dimension of label should be 1, "
         f"but received {label.shape[-1]}."
     )
-    assert (
-        input.shape[:-1] == label.shape[:-1]
-    ), "All dimensions should be equal except the last one."
+    assert input.shape[:-1] == label.shape[:-1], (
+        "All dimensions should be equal except the last one."
+    )
 
     label = paddle.squeeze(label, [-1])
     label = paddle.nn.functional.one_hot(label, input.shape[-1])
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index ede06a5a91331b..860915efc1078f 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -704,9 +704,9 @@ def max_pool1d(
 
 
 def _unpool_output_size(x, kernel_size, stride, padding, output_size):
-    assert output_size is None or isinstance(
-        output_size, (list, tuple)
-    ), f"Required output_size is None|list|tuple, but received {output_size}"
+    assert output_size is None or isinstance(output_size, (list, tuple)), (
+        f"Required output_size is None|list|tuple, but received {output_size}"
+    )
     input_size = x.shape
     default_size = []
     for d in range(len(kernel_size)):
diff --git a/python/paddle/nn/initializer/bilinear.py b/python/paddle/nn/initializer/bilinear.py
index 3ee5814e92115b..7253970871a025 100644
--- a/python/paddle/nn/initializer/bilinear.py
+++ b/python/paddle/nn/initializer/bilinear.py
@@ -96,7 +96,9 @@ def forward(
         """
         assert not (
             isinstance(var, framework.EagerParamBase) and var.is_dist()
-        ), "Currently, Bilinear initializer not support lazy init for dist param."
+        ), (
+            "Currently, Bilinear initializer not support lazy init for dist param."
+        )
         block = self._check_block(block)
 
         if not isinstance(var, (framework.Variable, pir.core.ParameterMeta)):
diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py
index 82b8e511a6eb61..374a0b756df420 100644
--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -91,9 +91,9 @@ class Dirac(Initializer):
     """
 
     def __init__(self, groups: int = 1, name: str | None = None) -> None:
-        assert groups > 0 and isinstance(
-            groups, int
-        ), " 'groups' must be a positive integer. "
+        assert groups > 0 and isinstance(groups, int), (
+            " 'groups' must be a positive integer. "
+        )
         super().__init__()
         self._groups = groups
 
@@ -127,9 +127,9 @@ def __call__(
             4,
             5,
         ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac"
-        assert (
-            var.shape[0] % self._groups
-        ) == 0, "Tensor 0-dimension must be divisible by groups"
+        assert (var.shape[0] % self._groups) == 0, (
+            "Tensor 0-dimension must be divisible by groups"
+        )
 
         if framework.in_pir_mode():
             if var.dtype != core.DataType.FLOAT32:
diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py
index a53f6bcf0340a7..2df53506c32c9b 100644
--- a/python/paddle/nn/initializer/kaiming.py
+++ b/python/paddle/nn/initializer/kaiming.py
@@ -114,7 +114,9 @@ def forward(
         """
         assert not (
             isinstance(var, framework.EagerParamBase) and var.is_dist()
-        ), "Currently, kaiming initializer not support lazy init for dist param."
+        ), (
+            "Currently, kaiming initializer not support lazy init for dist param."
+        )
         block = self._check_block(block)
         assert isinstance(
             var,
diff --git a/python/paddle/nn/initializer/lazy_init.py b/python/paddle/nn/initializer/lazy_init.py
index a6be4c4d168650..97a4d623145f63 100644
--- a/python/paddle/nn/initializer/lazy_init.py
+++ b/python/paddle/nn/initializer/lazy_init.py
@@ -44,9 +44,9 @@ def enable(self):
         """
         if self._state:
             return
-        assert (
-            framework.in_dygraph_mode()
-        ), "LazyInit.enable() is only available in dygraph mode."
+        assert framework.in_dygraph_mode(), (
+            "LazyInit.enable() is only available in dygraph mode."
+        )
         self._state = True
 
     def disable(self):
diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py
index 80bd02c2d9adf3..c4bd58169fd20a 100644
--- a/python/paddle/nn/initializer/orthogonal.py
+++ b/python/paddle/nn/initializer/orthogonal.py
@@ -85,7 +85,9 @@ def __call__(self, var: paddle.Tensor, block: pir.Block | None = None):
         """
         assert not (
             isinstance(var, framework.EagerParamBase) and var.is_dist()
-        ), "Currently, orthogonal initializer not support lazy init for dist param."
+        ), (
+            "Currently, orthogonal initializer not support lazy init for dist param."
+        )
         block = self._check_block(block)
         assert isinstance(
             var, (framework.Variable, paddle.pir.Value, pir.core.ParameterMeta)
@@ -94,9 +96,9 @@ def __call__(self, var: paddle.Tensor, block: pir.Block | None = None):
         self._seed = block.program.random_seed
 
         shape = var.shape
-        assert (
-            len(shape) >= 2
-        ), "Only Tensor with 2 or more dimensions can be initialized by Orthogonal"
+        assert len(shape) >= 2, (
+            "Only Tensor with 2 or more dimensions can be initialized by Orthogonal"
+        )
 
         row = shape[0]
         col = 1
diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py
index 5628095e41bd85..8fa4214b26239e 100644
--- a/python/paddle/nn/initializer/uniform.py
+++ b/python/paddle/nn/initializer/uniform.py
@@ -86,7 +86,9 @@ def forward(
         """
         assert not (
             isinstance(var, framework.EagerParamBase) and var.is_dist()
-        ), "Currently, uniform initializer not support lazy init for dist param."
+        ), (
+            "Currently, uniform initializer not support lazy init for dist param."
+        )
         block = self._check_block(block)
 
         assert isinstance(block, (framework.Block, pir.Block))
diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
index bcd7369092766d..d57d26a887852a 100644
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -1631,9 +1631,9 @@ def __init__(self, name: str | None = None) -> None:
         self._name = name
 
     def forward(self, x: Tensor) -> Tensor:
-        assert (
-            x.ndim == 3 or x.ndim == 4
-        ), f"Softmax2D requires a 3D or 4D tensor as input. Received: {x.ndim}D."
+        assert x.ndim == 3 or x.ndim == 4, (
+            f"Softmax2D requires a 3D or 4D tensor as input. Received: {x.ndim}D."
+        )
         return F.softmax(x, axis=-3, dtype=self._dtype, name=self._name)
 
     def extra_repr(self) -> str:
diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py
index b446828372a92c..68d0b70e11bf3e 100644
--- a/python/paddle/nn/layer/container.py
+++ b/python/paddle/nn/layer/container.py
@@ -631,9 +631,9 @@ def insert(self, index: int, sublayer: Layer) -> None:
         """
         assert isinstance(index, int) and -len(
             self._sub_layers
-        ) <= index <= len(
-            self._sub_layers
-        ), f"index should be an integer in range [{-len(self)}, {len(self)}]"
+        ) <= index <= len(self._sub_layers), (
+            f"index should be an integer in range [{-len(self)}, {len(self)}]"
+        )
 
         if index < 0:
             index += len(self)
diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py
index 13a89cdce03073..1f9878bf33bdbb 100644
--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -94,9 +94,9 @@ def __init__(
         data_format: DataLayoutND = "NCHW",
     ) -> None:
         super().__init__()
-        assert (
-            weight_attr is not False
-        ), "weight_attr should not be False in Conv."
+        assert weight_attr is not False, (
+            "weight_attr should not be False in Conv."
+        )
         self._param_attr = weight_attr
         self._bias_attr = bias_attr
         self._groups = groups
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index c8269fb3b8b785..bfe36b4379aa5c 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -1701,9 +1701,9 @@ def add_parameter(self, name: str, parameter: Tensor) -> Tensor:
                 self._parameters[name] = None
 
             if len(self._loaddict_holder) > 0:
-                assert (
-                    parameter.name in self._loaddict_holder
-                ), f"Parameter not found, Can't not find [ {parameter.name} ] in state_dict"
+                assert parameter.name in self._loaddict_holder, (
+                    f"Parameter not found, Can't not find [ {parameter.name} ] in state_dict"
+                )
 
                 parameter.set_value(self._loaddict_holder[parameter.name])
 
@@ -1814,9 +1814,9 @@ def _remove_if_exist(*dicts):
             if params is None:
                 raise ValueError("super().__init__() should be called first")
             if len(self._loaddict_holder) > 0:
-                assert (
-                    value.name in self._loaddict_holder
-                ), f"Parameter not found, Can't not find [ {value.name} ] in state_dict"
+                assert value.name in self._loaddict_holder, (
+                    f"Parameter not found, Can't not find [ {value.name} ] in state_dict"
+                )
 
                 value.set_value(self._loaddict_holder[value.name])
 
@@ -2555,9 +2555,9 @@ def _to_impl(
         if blocking is None:
             blocking = True
         else:
-            assert isinstance(
-                blocking, bool
-            ), "blocking value error, must be the True, False or None"
+            assert isinstance(blocking, bool), (
+                "blocking value error, must be the True, False or None"
+            )
 
         def transform(t, device, dtype, blocking):
             if floating_only and (not paddle.is_floating_point(t)):
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 3c43a2b1f81507..b0315dd8936891 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -94,9 +94,9 @@ def __init__(
         super().__init__()
 
         if weight_attr is False or bias_attr is False:
-            assert (
-                weight_attr == bias_attr
-            ), "weight_attr and bias_attr must be set to False at the same time in InstanceNorm"
+            assert weight_attr == bias_attr, (
+                "weight_attr and bias_attr must be set to False at the same time in InstanceNorm"
+            )
         self._momentum = momentum
         self._epsilon = epsilon
         self._weight_attr = weight_attr
@@ -1919,9 +1919,9 @@ def __init__(
         self._dtype = dtype
 
         self._weight_shape = list(weight_shape)
-        assert (
-            np.prod(self._weight_shape) > 0
-        ), "Any dimension of `weight_shape` cannot be equal to 0."
+        assert np.prod(self._weight_shape) > 0, (
+            "Any dimension of `weight_shape` cannot be equal to 0."
+        )
         assert dim < len(self._weight_shape), (
             "The input `dim` should be less than the "
             "length of `weight_shape`, but received dim="
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index b1ab61ae27e307..bc4698c5b38504 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -1496,9 +1496,9 @@ def forward(
         **kwargs: Any,
     ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
         if isinstance(initial_states, (list, tuple)):
-            assert (
-                len(initial_states) == 2
-            ), "length of initial_states should be 2 when it is a list/tuple"
+            assert len(initial_states) == 2, (
+                "length of initial_states should be 2 when it is a list/tuple"
+            )
 
         outputs, final_states = birnn(
             self.cell_fw,
diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py
index fea23ad97c0cc0..152dc9215e1d21 100644
--- a/python/paddle/nn/layer/transformer.py
+++ b/python/paddle/nn/layer/transformer.py
@@ -70,9 +70,9 @@ def _convert_param_attr_to_list(param_attr, n):
         list: A list composed of each including cell's `param_attr`.
     """
     if isinstance(param_attr, (list, tuple)):
-        assert (
-            len(param_attr) == n
-        ), f"length of param_attr should be {n} when it is a list/tuple"
+        assert len(param_attr) == n, (
+            f"length of param_attr should be {n} when it is a list/tuple"
+        )
         param_attrs = []
         for attr in param_attr:
             if isinstance(attr, bool):
@@ -197,12 +197,12 @@ def __init__(
     ) -> None:
         super().__init__()
 
-        assert (
-            embed_dim > 0
-        ), f"Expected embed_dim to be greater than 0, but received {embed_dim}"
-        assert (
-            num_heads > 0
-        ), f"Expected num_heads to be greater than 0, but received {num_heads}"
+        assert embed_dim > 0, (
+            f"Expected embed_dim to be greater than 0, but received {embed_dim}"
+        )
+        assert num_heads > 0, (
+            f"Expected num_heads to be greater than 0, but received {num_heads}"
+        )
 
         self.embed_dim = embed_dim
         self.kdim = kdim if kdim is not None else embed_dim
@@ -212,9 +212,9 @@ def __init__(
         self.need_weights = need_weights
 
         self.head_dim = embed_dim // num_heads
-        assert (
-            self.head_dim * num_heads == self.embed_dim
-        ), "embed_dim must be divisible by num_heads"
+        assert self.head_dim * num_heads == self.embed_dim, (
+            "embed_dim must be divisible by num_heads"
+        )
 
         self.q_proj = Linear(
             embed_dim, embed_dim, weight_attr, bias_attr=bias_attr
@@ -646,12 +646,12 @@ def __init__(
 
         super().__init__()
 
-        assert (
-            d_model > 0
-        ), f"Expected d_model to be greater than 0, but received {d_model}"
-        assert (
-            nhead > 0
-        ), f"Expected nhead to be greater than 0, but received {nhead}"
+        assert d_model > 0, (
+            f"Expected d_model to be greater than 0, but received {d_model}"
+        )
+        assert nhead > 0, (
+            f"Expected nhead to be greater than 0, but received {nhead}"
+        )
         assert dim_feedforward > 0, (
             "Expected dim_feedforward to be greater than 0, "
             f"but received {dim_feedforward}"
@@ -1017,12 +1017,12 @@ def __init__(
 
         super().__init__()
 
-        assert (
-            d_model > 0
-        ), f"Expected d_model to be greater than 0, but received {d_model}"
-        assert (
-            nhead > 0
-        ), f"Expected nhead to be greater than 0, but received {nhead}"
+        assert d_model > 0, (
+            f"Expected d_model to be greater than 0, but received {d_model}"
+        )
+        assert nhead > 0, (
+            f"Expected nhead to be greater than 0, but received {nhead}"
+        )
         assert dim_feedforward > 0, (
             "Expected dim_feedforward to be greater than 0, "
             f"but received {dim_feedforward}"
@@ -1547,12 +1547,12 @@ def __init__(
     ) -> None:
         super().__init__()
 
-        assert (
-            d_model > 0
-        ), f"Expected d_model to be greater than 0, but received {d_model}"
-        assert (
-            nhead > 0
-        ), f"Expected nhead to be greater than 0, but received {nhead}"
+        assert d_model > 0, (
+            f"Expected d_model to be greater than 0, but received {d_model}"
+        )
+        assert nhead > 0, (
+            f"Expected nhead to be greater than 0, but received {nhead}"
+        )
         assert dim_feedforward > 0, (
             "Expected dim_feedforward to be greater than 0, "
             f"but received {dim_feedforward}"
diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py
index 6d48b7c2218772..1a52f47b3cf42d 100644
--- a/python/paddle/nn/quant/format.py
+++ b/python/paddle/nn/quant/format.py
@@ -36,15 +36,11 @@ def fake_fp8_quant(input, scale, axis=-1, type='e4m3'):
     if type == 'e4m3':
         return paddle.cast(
             (inp * 448 / scale).clip(-448, 448), "float8_e4m3fn"
-        ).astype(
-            input.dtype
-        )  # clip then cast
+        ).astype(input.dtype)  # clip then cast
     elif type == 'e5m2':
         return paddle.cast(
             (inp * 57344 / scale).clip(-57344, 57344), "float8_e5m2"
-        ).astype(
-            input.dtype
-        )  # clip then cast
+        ).astype(input.dtype)  # clip then cast
     else:
         raise NotImplementedError("only support e4m3 or e5m2 now")
 
diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py
index e2e13a159ba155..1381e916bf5743 100644
--- a/python/paddle/nn/quant/quant_layers.py
+++ b/python/paddle/nn/quant/quant_layers.py
@@ -318,9 +318,9 @@ def __init__(
         quant_on_weight: bool = False,
         reduce_type: Literal['max'] | None = None,
     ) -> None:
-        assert (
-            quant_on_weight
-        ), "Channel_wise only can be used on weight quantization."
+        assert quant_on_weight, (
+            "Channel_wise only can be used on weight quantization."
+        )
         super().__init__()
         self._quant_bits = quant_bits
         self._quant_axis = quant_axis
@@ -872,12 +872,12 @@ def __init__(
         '''
 
         '''
-        assert (
-            weight_quant_layer is None
-        ), "When quantizing ColumnParallelLinear, weight_quant_layer should be None."
-        assert (
-            act_quant_layer is None
-        ), "When quantizing ColumnParallelLinear, act_quant_layer should be None."
+        assert weight_quant_layer is None, (
+            "When quantizing ColumnParallelLinear, weight_quant_layer should be None."
+        )
+        assert act_quant_layer is None, (
+            "When quantizing ColumnParallelLinear, act_quant_layer should be None."
+        )
 
         self.weight = layer.weight
         self.bias = layer.bias
@@ -972,12 +972,12 @@ def __init__(
         act_quant_layer: Literal[None] = None,
     ) -> None:
         super().__init__()
-        assert (
-            weight_quant_layer is None
-        ), "When quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself."
-        assert (
-            act_quant_layer is None
-        ), "When quantizing RowParallelLinear, act_quant_layer cannot defined by yourself."
+        assert weight_quant_layer is None, (
+            "When quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself."
+        )
+        assert act_quant_layer is None, (
+            "When quantizing RowParallelLinear, act_quant_layer cannot defined by yourself."
+        )
 
         # For Linear
         self.weight = layer.weight
diff --git a/python/paddle/nn/quant/quantized_linear.py b/python/paddle/nn/quant/quantized_linear.py
index 61d3897a468fa8..1192d37fc97132 100644
--- a/python/paddle/nn/quant/quantized_linear.py
+++ b/python/paddle/nn/quant/quantized_linear.py
@@ -106,11 +106,13 @@ def weight_quantize(
             or arch == 89
             or arch == 90
             or arch == 92
-        ), f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} "
+        ), (
+            f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} "
+        )
 
-    assert (
-        group_size == -1 or group_size == 64 or group_size == 128
-    ), f"Currently group_size only support -1/64/128. but got {group_size} "
+    assert group_size == -1 or group_size == 64 or group_size == 128, (
+        f"Currently group_size only support -1/64/128. but got {group_size} "
+    )
     if in_dynamic_or_pir_mode():
         return _C_ops.weight_quantize(x, algo, arch, group_size)
     else:
@@ -160,9 +162,9 @@ def weight_dequantize(
             >>> out, scale = weight_quantize(x, algo='weight_only_int8')
             >>> x_dequant = weight_dequantize(out, scale)
     """
-    assert (
-        group_size == -1 or group_size == 64 or group_size == 128
-    ), f"Currently group_size only support -1/64/128. but got {group_size} "
+    assert group_size == -1 or group_size == 64 or group_size == 128, (
+        f"Currently group_size only support -1/64/128. but got {group_size} "
+    )
 
     if in_dynamic_or_pir_mode():
         return _C_ops.weight_dequantize(x, scale, algo, group_size)
@@ -236,10 +238,12 @@ def weight_only_linear(
             or arch == 86
             or arch == 89
             or arch == 90
-        ), f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} "
-    assert (
-        group_size == -1 or group_size == 64 or group_size == 128
-    ), f"Currently weight_quantize only support group size of -1, 64 or 128. but got {group_size} "
+        ), (
+            f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} "
+        )
+    assert group_size == -1 or group_size == 64 or group_size == 128, (
+        f"Currently weight_quantize only support group size of -1, 64 or 128. but got {group_size} "
+    )
 
     if in_dynamic_or_pir_mode():
         out = _C_ops.weight_only_linear(
diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py
index 9c75266dfb516f..d1ef94b243a7d4 100644
--- a/python/paddle/nn/utils/weight_norm_hook.py
+++ b/python/paddle/nn/utils/weight_norm_hook.py
@@ -137,9 +137,9 @@ def apply(layer: Layer, name: str, dim: int) -> WeightNorm:
 
         # support dim is negative number, (dim = -1) == (dim = None)
         weight_dim = len(layer._parameters[name].shape)
-        assert (
-            dim < weight_dim and dim >= -1 * weight_dim
-        ), "dim must set between [-R, R), R means the dimension of weight."
+        assert dim < weight_dim and dim >= -1 * weight_dim, (
+            "dim must set between [-R, R), R means the dimension of weight."
+        )
         if dim != -1:
             dim = (dim + weight_dim) % weight_dim