diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1f1db341c82de9..4bf26ab77bb746 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -89,7 +89,7 @@ repos: # | python/paddle/j.+ - # | python/paddle/[k-n].+ + | python/paddle/[k-n].+ # | python/paddle/[o-t].+ @@ -145,7 +145,7 @@ repos: | python/paddle/j.+ - | python/paddle/[k-n].+ + # | python/paddle/[k-n].+ | python/paddle/[o-t].+ diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index c3ddf5f8dd7973..863a2c7e47ea65 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -603,9 +603,9 @@ def prelu( [-1.25000000, 6. , 7. , -2. ], [ 6. , 7. , 8. , 9. ]]]]) """ - assert ( - len(weight.shape) == 0 or len(weight.shape) == 1 - ), "The dim count of weight shape should be 0 or 1 in prelu()." + assert len(weight.shape) == 0 or len(weight.shape) == 1, ( + "The dim count of weight shape should be 0 or 1 in prelu()." + ) mode = 'all' if len(weight.shape) == 1 and weight.shape[0] > 1: @@ -626,19 +626,19 @@ def prelu( data_format = 'NCHW' if data_format[1] == 'C' else 'NHWC' - assert ( - len(x.shape) > 1 - ), "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]." + assert len(x.shape) > 1, ( + "The dim count of x should be equal or larger than 2 in prelu() when weight shape is not [1]." + ) # NOTE(GuoxiaWang): support NHWC data format if data_format == 'NHWC': - assert ( - weight.shape[0] == x.shape[-1] - ), "The weight size should be equal to x input channel in prelu() when weight shape is not [1]." + assert weight.shape[0] == x.shape[-1], ( + "The weight size should be equal to x input channel in prelu() when weight shape is not [1]." + ) else: - assert ( - weight.shape[0] == x.shape[1] - ), "The weight size should be equal to x input channel in prelu() when weight shape is not [1]." + assert weight.shape[0] == x.shape[1], ( + "The weight size should be equal to x input channel in prelu() when weight shape is not [1]." + ) mode = 'channel' if in_dynamic_or_pir_mode(): diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 02575f0e4fa4cb..7f2e3d0ccbc1c5 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -601,9 +601,9 @@ def _is_list_or_tuple_(data): if isinstance(dim_size, (Variable, paddle.pir.Value)): contain_var = True continue - assert ( - dim_size > 0 - ), "Each dimension size given in out_shape must be greater than 0." + assert dim_size > 0, ( + "Each dimension size given in out_shape must be greater than 0." + ) if contain_var: new_size_tensor = [] @@ -2068,7 +2068,9 @@ def pad( 'replicate', 'constant', 'circular', - ], f"mode should be one of constant, reflect, replicate, circular, but got {mode}." + ], ( + f"mode should be one of constant, reflect, replicate, circular, but got {mode}." + ) x_dim = len(x.shape) if in_dynamic_mode(): @@ -2162,9 +2164,9 @@ def pad( 4: ["NCHW", "NHWC"], 5: ["NCDHW", "NDHWC"], } - assert ( - data_format in supported_format_map[x_dim] - ), f"input tensor dimension is {x_dim}, it's data format should be in {supported_format_map[x_dim]} but got {data_format}" + assert data_format in supported_format_map[x_dim], ( + f"input tensor dimension is {x_dim}, it's data format should be in {supported_format_map[x_dim]} but got {data_format}" + ) unsqueezed_dim = [] @@ -2831,9 +2833,9 @@ def fold( ) assert len(x.shape) == 3, "input should be the format of [N, C, L]" - assert ( - math.prod(x.shape) >= 0 - ), "The number of elements must greater or equal than zero." + assert math.prod(x.shape) >= 0, ( + "The number of elements must greater or equal than zero." + ) def _is_list_or_tuple_(data): return isinstance(data, (list, tuple)) @@ -2841,30 +2843,30 @@ def _is_list_or_tuple_(data): if isinstance(output_sizes, int): output_sizes = [output_sizes, output_sizes] else: - assert _is_list_or_tuple_(output_sizes) and ( - len(output_sizes) == 2 - ), "output_sizes should either be an integer or a list/tuple of two integers" + assert _is_list_or_tuple_(output_sizes) and (len(output_sizes) == 2), ( + "output_sizes should either be an integer or a list/tuple of two integers" + ) if isinstance(kernel_sizes, int): kernel_sizes = [kernel_sizes, kernel_sizes] else: - assert _is_list_or_tuple_(kernel_sizes) and ( - len(kernel_sizes) == 2 - ), "kernel_sizes should either be an integer or a list/tuple of two integers" + assert _is_list_or_tuple_(kernel_sizes) and (len(kernel_sizes) == 2), ( + "kernel_sizes should either be an integer or a list/tuple of two integers" + ) if isinstance(strides, int): strides = [strides, strides] else: - assert _is_list_or_tuple_(strides) and ( - len(strides) == 2 - ), "strides should either be an integer or a list/tuple of two integers" + assert _is_list_or_tuple_(strides) and (len(strides) == 2), ( + "strides should either be an integer or a list/tuple of two integers" + ) if isinstance(dilations, int): dilations = [dilations, dilations] else: - assert _is_list_or_tuple_(dilations) and ( - len(dilations) == 2 - ), "dilations should either be an integer or a list/tuple of two integers" + assert _is_list_or_tuple_(dilations) and (len(dilations) == 2), ( + "dilations should either be an integer or a list/tuple of two integers" + ) if isinstance(paddings, int): paddings = [paddings] * 4 diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 121da930dc3c40..6d6b9bd3bdd531 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -272,9 +272,9 @@ def _conv_nd( attrs={'axis': -1}, ) else: - assert len(x_shape) > len( - y_shape - ), 'The length of pre_bias must greater than the length of bias' + assert len(x_shape) > len(y_shape), ( + 'The length of pre_bias must greater than the length of bias' + ) padding = len(x_shape) - len(y_shape) - channel_dim bias = reshape( bias, [1] * channel_dim + y_shape + [1] * padding @@ -1336,9 +1336,9 @@ def conv2d_transpose( attrs={'axis': -1}, ) else: - assert len(x_shape) > len( - y_shape - ), 'The length of pre_bias must greater than the length of bias' + assert len(x_shape) > len(y_shape), ( + 'The length of pre_bias must greater than the length of bias' + ) padding = len(x_shape) - len(y_shape) - channel_dim bias = reshape( bias, [1] * channel_dim + y_shape + [1] * padding diff --git a/python/paddle/nn/functional/flash_attention.py b/python/paddle/nn/functional/flash_attention.py index 4a7ab07cef44e6..c6f2856e228218 100644 --- a/python/paddle/nn/functional/flash_attention.py +++ b/python/paddle/nn/functional/flash_attention.py @@ -508,30 +508,30 @@ def flash_attention( fa_version = paddle.base.framework.get_flags( ["FLAGS_flash_attn_version"] )["FLAGS_flash_attn_version"] - assert ( - in_dynamic_or_pir_mode() or fa_version == 2 - ), "flash attention 3 only support dynamic or pir mode" - assert ( - dropout == 0.0 or fa_version == 2 - ), "flash attention 3 does not support dropout" - assert ( - not return_softmax or fa_version == 2 - ), "flash attention 3 does not support return softmax" - assert ( - fixed_seed_offset is None or fa_version == 2 - ), "flash attention 3 does not support return softmax" - assert ( - rng_name == "" or fa_version == 2 - ), "flash attention 3 does not support setting rng_name" - assert ( - training or fa_version == 2 - ), "flash attention 3 does not support setting training" - assert ( - name is None or fa_version == 2 - ), "flash attention 3 does not support setting name" - assert ( - softmax_scale is None or fa_version == 3 - ), "flash attention 2 does not support setting softmax_scale" + assert in_dynamic_or_pir_mode() or fa_version == 2, ( + "flash attention 3 only support dynamic or pir mode" + ) + assert dropout == 0.0 or fa_version == 2, ( + "flash attention 3 does not support dropout" + ) + assert not return_softmax or fa_version == 2, ( + "flash attention 3 does not support return softmax" + ) + assert fixed_seed_offset is None or fa_version == 2, ( + "flash attention 3 does not support return softmax" + ) + assert rng_name == "" or fa_version == 2, ( + "flash attention 3 does not support setting rng_name" + ) + assert training or fa_version == 2, ( + "flash attention 3 does not support setting training" + ) + assert name is None or fa_version == 2, ( + "flash attention 3 does not support setting name" + ) + assert softmax_scale is None or fa_version == 3, ( + "flash attention 2 does not support setting softmax_scale" + ) if in_dynamic_or_pir_mode(): if fa_version == 2: (result_attention, result_softmax, _, _) = _C_ops.flash_attn( @@ -1142,9 +1142,9 @@ def flash_attn_varlen_func( >>> output = paddle.nn.functional.flash_attention.flash_attention_v3_varlen(q, q, q, cu_seqlens_q, cu_seqlens_q, max_seqlen_q=max_seq_len_q, max_seqlen_k=max_seq_len_q, causal=True) >>> # doctest: -SKIP """ - assert ( - "xpu" not in paddle.get_device() - ), "flash_attn_varlen_func is not supported on xpu" + assert "xpu" not in paddle.get_device(), ( + "flash_attn_varlen_func is not supported on xpu" + ) assert not paddle.get_flags(["FLAGS_cudnn_deterministic"])[ "FLAGS_cudnn_deterministic" @@ -1157,9 +1157,9 @@ def flash_attn_varlen_func( == 3 ), "FLAGS_flash_attn_version is 2, conflicts with flash_attn_varlen_func" - assert ( - in_dynamic_or_pir_mode() - ), "flash_attn_varlen_func only support dynamic or pir mode" + assert in_dynamic_or_pir_mode(), ( + "flash_attn_varlen_func only support dynamic or pir mode" + ) assert qv is None, "flash_attn_varlen_func does not support setting qv" @@ -2203,9 +2203,9 @@ def flashmask_attention( window_size = (window_size, window_size) sq = query.shape[1] bsz = query.shape[0] - assert ( - startend_row_indices is None - ), "can't use window_size with startend_row_indices" + assert startend_row_indices is None, ( + "can't use window_size with startend_row_indices" + ) if causal: startend_row_indices = paddle.arange( window_size[0] + 1, sq + window_size[0] + 1, dtype="int32" @@ -2246,24 +2246,26 @@ def flashmask_attention( ) else: - assert ( - startend_row_indices.dtype == paddle.int32 - ), f"startend_row_indices.dtype must be paddle.int32, but got {startend_row_indices.dtype}" - assert ( - len(startend_row_indices.shape) == 4 - ), f"startend_row_indices rank must be 4,but got {startend_row_indices.shape}" - - assert ( - startend_row_indices.shape[0] == key.shape[0] - ), f"startend_row_indices.shape[0] must be equal to batch_size, but got {startend_row_indices.shape[0]} and {key.shape[0]}" - - assert ( - startend_row_indices.shape[2] == key.shape[1] - ), f"startend_row_indices.shape[2] must be equal to seqlen_k, but got {startend_row_indices.shape[2]} and {key.shape[2]}" + assert startend_row_indices.dtype == paddle.int32, ( + f"startend_row_indices.dtype must be paddle.int32, but got {startend_row_indices.dtype}" + ) + assert len(startend_row_indices.shape) == 4, ( + f"startend_row_indices rank must be 4,but got {startend_row_indices.shape}" + ) + + assert startend_row_indices.shape[0] == key.shape[0], ( + f"startend_row_indices.shape[0] must be equal to batch_size, but got {startend_row_indices.shape[0]} and {key.shape[0]}" + ) + + assert startend_row_indices.shape[2] == key.shape[1], ( + f"startend_row_indices.shape[2] must be equal to seqlen_k, but got {startend_row_indices.shape[2]} and {key.shape[2]}" + ) assert startend_row_indices.shape[1] in [ 1, key.shape[2], - ], "startend_row_indices head_num must be equal to 1(broadcast) or head_num_k." + ], ( + "startend_row_indices head_num must be equal to 1(broadcast) or head_num_k." + ) if causal: if startend_row_indices.shape[-1] == 1: @@ -2383,9 +2385,9 @@ def calc_reduced_attention_scores( >>> ) >>> # doctest: -SKIP """ - assert ( - query.stop_gradient and key.stop_gradient - ), 'calc_reduced_attention_scores() is for inference only.' + assert query.stop_gradient and key.stop_gradient, ( + 'calc_reduced_attention_scores() is for inference only.' + ) if in_dynamic_or_pir_mode(): reduced_scores = _C_ops.calc_reduced_attn_scores( diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 907394d96b4179..b6e484aded5924 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -94,9 +94,9 @@ def dice_loss( """ assert input.dtype in (paddle.float32, paddle.float64) assert label.dtype in (paddle.int32, paddle.int64) - assert ( - len(input.shape) >= 2 - ), "The rank of input should be greater than or equal to 2." + assert len(input.shape) >= 2, ( + "The rank of input should be greater than or equal to 2." + ) assert len(input.shape) == len(label.shape), ( "The rank of input and label should be equal, " f"but received input: {len(input.shape)}, label: {len(label.shape)}." @@ -105,9 +105,9 @@ def dice_loss( "The last dimension of label should be 1, " f"but received {label.shape[-1]}." ) - assert ( - input.shape[:-1] == label.shape[:-1] - ), "All dimensions should be equal except the last one." + assert input.shape[:-1] == label.shape[:-1], ( + "All dimensions should be equal except the last one." + ) label = paddle.squeeze(label, [-1]) label = paddle.nn.functional.one_hot(label, input.shape[-1]) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index ede06a5a91331b..860915efc1078f 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -704,9 +704,9 @@ def max_pool1d( def _unpool_output_size(x, kernel_size, stride, padding, output_size): - assert output_size is None or isinstance( - output_size, (list, tuple) - ), f"Required output_size is None|list|tuple, but received {output_size}" + assert output_size is None or isinstance(output_size, (list, tuple)), ( + f"Required output_size is None|list|tuple, but received {output_size}" + ) input_size = x.shape default_size = [] for d in range(len(kernel_size)): diff --git a/python/paddle/nn/initializer/bilinear.py b/python/paddle/nn/initializer/bilinear.py index 3ee5814e92115b..7253970871a025 100644 --- a/python/paddle/nn/initializer/bilinear.py +++ b/python/paddle/nn/initializer/bilinear.py @@ -96,7 +96,9 @@ def forward( """ assert not ( isinstance(var, framework.EagerParamBase) and var.is_dist() - ), "Currently, Bilinear initializer not support lazy init for dist param." + ), ( + "Currently, Bilinear initializer not support lazy init for dist param." + ) block = self._check_block(block) if not isinstance(var, (framework.Variable, pir.core.ParameterMeta)): diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py index 82b8e511a6eb61..374a0b756df420 100644 --- a/python/paddle/nn/initializer/dirac.py +++ b/python/paddle/nn/initializer/dirac.py @@ -91,9 +91,9 @@ class Dirac(Initializer): """ def __init__(self, groups: int = 1, name: str | None = None) -> None: - assert groups > 0 and isinstance( - groups, int - ), " 'groups' must be a positive integer. " + assert groups > 0 and isinstance(groups, int), ( + " 'groups' must be a positive integer. " + ) super().__init__() self._groups = groups @@ -127,9 +127,9 @@ def __call__( 4, 5, ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac" - assert ( - var.shape[0] % self._groups - ) == 0, "Tensor 0-dimension must be divisible by groups" + assert (var.shape[0] % self._groups) == 0, ( + "Tensor 0-dimension must be divisible by groups" + ) if framework.in_pir_mode(): if var.dtype != core.DataType.FLOAT32: diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index a53f6bcf0340a7..2df53506c32c9b 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -114,7 +114,9 @@ def forward( """ assert not ( isinstance(var, framework.EagerParamBase) and var.is_dist() - ), "Currently, kaiming initializer not support lazy init for dist param." + ), ( + "Currently, kaiming initializer not support lazy init for dist param." + ) block = self._check_block(block) assert isinstance( var, diff --git a/python/paddle/nn/initializer/lazy_init.py b/python/paddle/nn/initializer/lazy_init.py index a6be4c4d168650..97a4d623145f63 100644 --- a/python/paddle/nn/initializer/lazy_init.py +++ b/python/paddle/nn/initializer/lazy_init.py @@ -44,9 +44,9 @@ def enable(self): """ if self._state: return - assert ( - framework.in_dygraph_mode() - ), "LazyInit.enable() is only available in dygraph mode." + assert framework.in_dygraph_mode(), ( + "LazyInit.enable() is only available in dygraph mode." + ) self._state = True def disable(self): diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py index 80bd02c2d9adf3..c4bd58169fd20a 100644 --- a/python/paddle/nn/initializer/orthogonal.py +++ b/python/paddle/nn/initializer/orthogonal.py @@ -85,7 +85,9 @@ def __call__(self, var: paddle.Tensor, block: pir.Block | None = None): """ assert not ( isinstance(var, framework.EagerParamBase) and var.is_dist() - ), "Currently, orthogonal initializer not support lazy init for dist param." + ), ( + "Currently, orthogonal initializer not support lazy init for dist param." + ) block = self._check_block(block) assert isinstance( var, (framework.Variable, paddle.pir.Value, pir.core.ParameterMeta) @@ -94,9 +96,9 @@ def __call__(self, var: paddle.Tensor, block: pir.Block | None = None): self._seed = block.program.random_seed shape = var.shape - assert ( - len(shape) >= 2 - ), "Only Tensor with 2 or more dimensions can be initialized by Orthogonal" + assert len(shape) >= 2, ( + "Only Tensor with 2 or more dimensions can be initialized by Orthogonal" + ) row = shape[0] col = 1 diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index 5628095e41bd85..8fa4214b26239e 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -86,7 +86,9 @@ def forward( """ assert not ( isinstance(var, framework.EagerParamBase) and var.is_dist() - ), "Currently, uniform initializer not support lazy init for dist param." + ), ( + "Currently, uniform initializer not support lazy init for dist param." + ) block = self._check_block(block) assert isinstance(block, (framework.Block, pir.Block)) diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index bcd7369092766d..d57d26a887852a 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -1631,9 +1631,9 @@ def __init__(self, name: str | None = None) -> None: self._name = name def forward(self, x: Tensor) -> Tensor: - assert ( - x.ndim == 3 or x.ndim == 4 - ), f"Softmax2D requires a 3D or 4D tensor as input. Received: {x.ndim}D." + assert x.ndim == 3 or x.ndim == 4, ( + f"Softmax2D requires a 3D or 4D tensor as input. Received: {x.ndim}D." + ) return F.softmax(x, axis=-3, dtype=self._dtype, name=self._name) def extra_repr(self) -> str: diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index b446828372a92c..68d0b70e11bf3e 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -631,9 +631,9 @@ def insert(self, index: int, sublayer: Layer) -> None: """ assert isinstance(index, int) and -len( self._sub_layers - ) <= index <= len( - self._sub_layers - ), f"index should be an integer in range [{-len(self)}, {len(self)}]" + ) <= index <= len(self._sub_layers), ( + f"index should be an integer in range [{-len(self)}, {len(self)}]" + ) if index < 0: index += len(self) diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 13a89cdce03073..1f9878bf33bdbb 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -94,9 +94,9 @@ def __init__( data_format: DataLayoutND = "NCHW", ) -> None: super().__init__() - assert ( - weight_attr is not False - ), "weight_attr should not be False in Conv." + assert weight_attr is not False, ( + "weight_attr should not be False in Conv." + ) self._param_attr = weight_attr self._bias_attr = bias_attr self._groups = groups diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py index c8269fb3b8b785..bfe36b4379aa5c 100644 --- a/python/paddle/nn/layer/layers.py +++ b/python/paddle/nn/layer/layers.py @@ -1701,9 +1701,9 @@ def add_parameter(self, name: str, parameter: Tensor) -> Tensor: self._parameters[name] = None if len(self._loaddict_holder) > 0: - assert ( - parameter.name in self._loaddict_holder - ), f"Parameter not found, Can't not find [ {parameter.name} ] in state_dict" + assert parameter.name in self._loaddict_holder, ( + f"Parameter not found, Can't not find [ {parameter.name} ] in state_dict" + ) parameter.set_value(self._loaddict_holder[parameter.name]) @@ -1814,9 +1814,9 @@ def _remove_if_exist(*dicts): if params is None: raise ValueError("super().__init__() should be called first") if len(self._loaddict_holder) > 0: - assert ( - value.name in self._loaddict_holder - ), f"Parameter not found, Can't not find [ {value.name} ] in state_dict" + assert value.name in self._loaddict_holder, ( + f"Parameter not found, Can't not find [ {value.name} ] in state_dict" + ) value.set_value(self._loaddict_holder[value.name]) @@ -2555,9 +2555,9 @@ def _to_impl( if blocking is None: blocking = True else: - assert isinstance( - blocking, bool - ), "blocking value error, must be the True, False or None" + assert isinstance(blocking, bool), ( + "blocking value error, must be the True, False or None" + ) def transform(t, device, dtype, blocking): if floating_only and (not paddle.is_floating_point(t)): diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 3c43a2b1f81507..b0315dd8936891 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -94,9 +94,9 @@ def __init__( super().__init__() if weight_attr is False or bias_attr is False: - assert ( - weight_attr == bias_attr - ), "weight_attr and bias_attr must be set to False at the same time in InstanceNorm" + assert weight_attr == bias_attr, ( + "weight_attr and bias_attr must be set to False at the same time in InstanceNorm" + ) self._momentum = momentum self._epsilon = epsilon self._weight_attr = weight_attr @@ -1919,9 +1919,9 @@ def __init__( self._dtype = dtype self._weight_shape = list(weight_shape) - assert ( - np.prod(self._weight_shape) > 0 - ), "Any dimension of `weight_shape` cannot be equal to 0." + assert np.prod(self._weight_shape) > 0, ( + "Any dimension of `weight_shape` cannot be equal to 0." + ) assert dim < len(self._weight_shape), ( "The input `dim` should be less than the " "length of `weight_shape`, but received dim=" diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index b1ab61ae27e307..bc4698c5b38504 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -1496,9 +1496,9 @@ def forward( **kwargs: Any, ) -> tuple[Tensor, tuple[Tensor, Tensor]]: if isinstance(initial_states, (list, tuple)): - assert ( - len(initial_states) == 2 - ), "length of initial_states should be 2 when it is a list/tuple" + assert len(initial_states) == 2, ( + "length of initial_states should be 2 when it is a list/tuple" + ) outputs, final_states = birnn( self.cell_fw, diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index fea23ad97c0cc0..152dc9215e1d21 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -70,9 +70,9 @@ def _convert_param_attr_to_list(param_attr, n): list: A list composed of each including cell's `param_attr`. """ if isinstance(param_attr, (list, tuple)): - assert ( - len(param_attr) == n - ), f"length of param_attr should be {n} when it is a list/tuple" + assert len(param_attr) == n, ( + f"length of param_attr should be {n} when it is a list/tuple" + ) param_attrs = [] for attr in param_attr: if isinstance(attr, bool): @@ -197,12 +197,12 @@ def __init__( ) -> None: super().__init__() - assert ( - embed_dim > 0 - ), f"Expected embed_dim to be greater than 0, but received {embed_dim}" - assert ( - num_heads > 0 - ), f"Expected num_heads to be greater than 0, but received {num_heads}" + assert embed_dim > 0, ( + f"Expected embed_dim to be greater than 0, but received {embed_dim}" + ) + assert num_heads > 0, ( + f"Expected num_heads to be greater than 0, but received {num_heads}" + ) self.embed_dim = embed_dim self.kdim = kdim if kdim is not None else embed_dim @@ -212,9 +212,9 @@ def __init__( self.need_weights = need_weights self.head_dim = embed_dim // num_heads - assert ( - self.head_dim * num_heads == self.embed_dim - ), "embed_dim must be divisible by num_heads" + assert self.head_dim * num_heads == self.embed_dim, ( + "embed_dim must be divisible by num_heads" + ) self.q_proj = Linear( embed_dim, embed_dim, weight_attr, bias_attr=bias_attr @@ -646,12 +646,12 @@ def __init__( super().__init__() - assert ( - d_model > 0 - ), f"Expected d_model to be greater than 0, but received {d_model}" - assert ( - nhead > 0 - ), f"Expected nhead to be greater than 0, but received {nhead}" + assert d_model > 0, ( + f"Expected d_model to be greater than 0, but received {d_model}" + ) + assert nhead > 0, ( + f"Expected nhead to be greater than 0, but received {nhead}" + ) assert dim_feedforward > 0, ( "Expected dim_feedforward to be greater than 0, " f"but received {dim_feedforward}" @@ -1017,12 +1017,12 @@ def __init__( super().__init__() - assert ( - d_model > 0 - ), f"Expected d_model to be greater than 0, but received {d_model}" - assert ( - nhead > 0 - ), f"Expected nhead to be greater than 0, but received {nhead}" + assert d_model > 0, ( + f"Expected d_model to be greater than 0, but received {d_model}" + ) + assert nhead > 0, ( + f"Expected nhead to be greater than 0, but received {nhead}" + ) assert dim_feedforward > 0, ( "Expected dim_feedforward to be greater than 0, " f"but received {dim_feedforward}" @@ -1547,12 +1547,12 @@ def __init__( ) -> None: super().__init__() - assert ( - d_model > 0 - ), f"Expected d_model to be greater than 0, but received {d_model}" - assert ( - nhead > 0 - ), f"Expected nhead to be greater than 0, but received {nhead}" + assert d_model > 0, ( + f"Expected d_model to be greater than 0, but received {d_model}" + ) + assert nhead > 0, ( + f"Expected nhead to be greater than 0, but received {nhead}" + ) assert dim_feedforward > 0, ( "Expected dim_feedforward to be greater than 0, " f"but received {dim_feedforward}" diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py index 6d48b7c2218772..1a52f47b3cf42d 100644 --- a/python/paddle/nn/quant/format.py +++ b/python/paddle/nn/quant/format.py @@ -36,15 +36,11 @@ def fake_fp8_quant(input, scale, axis=-1, type='e4m3'): if type == 'e4m3': return paddle.cast( (inp * 448 / scale).clip(-448, 448), "float8_e4m3fn" - ).astype( - input.dtype - ) # clip then cast + ).astype(input.dtype) # clip then cast elif type == 'e5m2': return paddle.cast( (inp * 57344 / scale).clip(-57344, 57344), "float8_e5m2" - ).astype( - input.dtype - ) # clip then cast + ).astype(input.dtype) # clip then cast else: raise NotImplementedError("only support e4m3 or e5m2 now") diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index e2e13a159ba155..1381e916bf5743 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -318,9 +318,9 @@ def __init__( quant_on_weight: bool = False, reduce_type: Literal['max'] | None = None, ) -> None: - assert ( - quant_on_weight - ), "Channel_wise only can be used on weight quantization." + assert quant_on_weight, ( + "Channel_wise only can be used on weight quantization." + ) super().__init__() self._quant_bits = quant_bits self._quant_axis = quant_axis @@ -872,12 +872,12 @@ def __init__( ''' ''' - assert ( - weight_quant_layer is None - ), "When quantizing ColumnParallelLinear, weight_quant_layer should be None." - assert ( - act_quant_layer is None - ), "When quantizing ColumnParallelLinear, act_quant_layer should be None." + assert weight_quant_layer is None, ( + "When quantizing ColumnParallelLinear, weight_quant_layer should be None." + ) + assert act_quant_layer is None, ( + "When quantizing ColumnParallelLinear, act_quant_layer should be None." + ) self.weight = layer.weight self.bias = layer.bias @@ -972,12 +972,12 @@ def __init__( act_quant_layer: Literal[None] = None, ) -> None: super().__init__() - assert ( - weight_quant_layer is None - ), "When quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself." - assert ( - act_quant_layer is None - ), "When quantizing RowParallelLinear, act_quant_layer cannot defined by yourself." + assert weight_quant_layer is None, ( + "When quantizing RowParallelLinear, weight_quant_layer cannot defined by yourself." + ) + assert act_quant_layer is None, ( + "When quantizing RowParallelLinear, act_quant_layer cannot defined by yourself." + ) # For Linear self.weight = layer.weight diff --git a/python/paddle/nn/quant/quantized_linear.py b/python/paddle/nn/quant/quantized_linear.py index 61d3897a468fa8..1192d37fc97132 100644 --- a/python/paddle/nn/quant/quantized_linear.py +++ b/python/paddle/nn/quant/quantized_linear.py @@ -106,11 +106,13 @@ def weight_quantize( or arch == 89 or arch == 90 or arch == 92 - ), f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} " + ), ( + f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} " + ) - assert ( - group_size == -1 or group_size == 64 or group_size == 128 - ), f"Currently group_size only support -1/64/128. but got {group_size} " + assert group_size == -1 or group_size == 64 or group_size == 128, ( + f"Currently group_size only support -1/64/128. but got {group_size} " + ) if in_dynamic_or_pir_mode(): return _C_ops.weight_quantize(x, algo, arch, group_size) else: @@ -160,9 +162,9 @@ def weight_dequantize( >>> out, scale = weight_quantize(x, algo='weight_only_int8') >>> x_dequant = weight_dequantize(out, scale) """ - assert ( - group_size == -1 or group_size == 64 or group_size == 128 - ), f"Currently group_size only support -1/64/128. but got {group_size} " + assert group_size == -1 or group_size == 64 or group_size == 128, ( + f"Currently group_size only support -1/64/128. but got {group_size} " + ) if in_dynamic_or_pir_mode(): return _C_ops.weight_dequantize(x, scale, algo, group_size) @@ -236,10 +238,12 @@ def weight_only_linear( or arch == 86 or arch == 89 or arch == 90 - ), f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} " - assert ( - group_size == -1 or group_size == 64 or group_size == 128 - ), f"Currently weight_quantize only support group size of -1, 64 or 128. but got {group_size} " + ), ( + f"Currently weight_quantize only support SM70/75/80/86/89/90. but got {arch} " + ) + assert group_size == -1 or group_size == 64 or group_size == 128, ( + f"Currently weight_quantize only support group size of -1, 64 or 128. but got {group_size} " + ) if in_dynamic_or_pir_mode(): out = _C_ops.weight_only_linear( diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py index 9c75266dfb516f..d1ef94b243a7d4 100644 --- a/python/paddle/nn/utils/weight_norm_hook.py +++ b/python/paddle/nn/utils/weight_norm_hook.py @@ -137,9 +137,9 @@ def apply(layer: Layer, name: str, dim: int) -> WeightNorm: # support dim is negative number, (dim = -1) == (dim = None) weight_dim = len(layer._parameters[name].shape) - assert ( - dim < weight_dim and dim >= -1 * weight_dim - ), "dim must set between [-R, R), R means the dimension of weight." + assert dim < weight_dim and dim >= -1 * weight_dim, ( + "dim must set between [-R, R), R means the dimension of weight." + ) if dim != -1: dim = (dim + weight_dim) % weight_dim