remove dropout from fluid (PaddlePaddle#48319)

* remove dropout
lxsbupt · Dec 14, 2022 · 3ccc7f5 · 3ccc7f5
1 parent 75ffc0f
commit 3ccc7f5
Show file tree

Hide file tree

Showing 37 changed files with 211 additions and 311 deletions.
diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -378,9 +378,9 @@ def get_single_direction_output(
 
                 step_input = new_hidden
                 if dropout_prob is not None and dropout_prob > 0.0:
-                    step_input = layers.dropout(
+                    step_input = paddle.nn.functional.dropout(
                         step_input,
-                        dropout_prob=dropout_prob,
+                        p=dropout_prob,
                     )
 
             rnn.step_output(step_input)
@@ -680,10 +680,10 @@ def get_single_direction_output(
 
                 step_input = new_hidden
                 if dropout_prob is not None and dropout_prob > 0.0:
-                    step_input = layers.dropout(
+                    step_input = paddle.nn.functional.dropout(
                         step_input,
-                        dropout_prob=dropout_prob,
-                        dropout_implementation='upscale_in_train',
+                        p=dropout_prob,
+                        mode='upscale_in_train',
                     )
 
             rnn.step_output(step_input)

diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -93,10 +93,10 @@ def conv_block(input, num_filter, groups, dropouts):
     conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
     conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
 
-    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
+    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
     fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
     bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
-    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
+    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
     fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
     return fc2
 

diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
@@ -66,7 +66,6 @@
     'fc',
     'embedding',
     'conv2d',
-    'dropout',
     'split',
     'l2_normalize',
     'row_conv',
@@ -750,139 +749,6 @@ def _pull_box_sparse(
     return outs
 
 
-@deprecated(since="2.0.0", update_to="paddle.nn.functional.dropout")
-def dropout(
-    x,
-    dropout_prob,
-    is_test=None,
-    seed=None,
-    name=None,
-    dropout_implementation="downgrade_in_infer",
-):
-    """
-
-    Computes dropout.
-
-    Drop or keep each element of `x` independently. Dropout is a regularization
-    technique for reducing overfitting by preventing neuron co-adaption during
-    training. The dropout operator randomly sets (according to the given dropout
-    probability) the outputs of some units to zero, while others are remain
-    unchanged.
-
-    dropout op can be removed from the program to make the program more efficient.
-
-    Args:
-        x (Variable): The input tensor variable. The data type is float16 or float32 or float64.
-        dropout_prob (float): Probability of setting units to zero.
-        is_test (bool): A flag indicating whether it is in test phrase or not.
-                        Default None, in dynamic graph, it use global tracer mode; in static graph, it means False.
-        seed (int): A Python integer used to create random seeds. If this
-                    parameter is set to None, a random seed is used.
-                    NOTE: If an integer seed is given, always the same output
-                    units will be dropped. DO NOT use a fixed seed in training.Default: None.
-        name (str|None): A name for this layer(optional). If set None, the layer
-                         will be named automatically.
-        dropout_implementation(string): ['downgrade_in_infer'(default)|'upscale_in_train']
-
-                                        1. downgrade_in_infer(default), downgrade the outcome at inference
-
-                                           - train: out = input * mask
-                                           - inference: out = input * (1.0 - dropout_prob)
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is dropout_prob)
-                                        2. upscale_in_train, upscale the outcome at training time
-
-                                           - train: out = input * mask / ( 1.0 - dropout_prob )
-                                           - inference: out = input
-
-                                           (mask is a tensor same shape with input, value is 0 or 1
-                                           ratio of 0 is dropout_prob)
-
-
-    Returns:
-        A Variable holding Tensor representing the dropout, has same shape and data type with `x`.
-
-    Examples:
-
-        .. code-block:: python
-
-            import paddle
-            import paddle.fluid as fluid
-
-            paddle.enable_static()
-            x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32")
-            dropped = fluid.layers.dropout(x, dropout_prob=0.5)
-    """
-    if not isinstance(dropout_prob, (float, int, Variable)):
-        raise TypeError(
-            "dropout_prob argument should be a number(int|float) or Variable"
-        )
-    # fast return for p == 0
-    if isinstance(dropout_prob, (int, float)) and dropout_prob == 0:
-        return x
-
-    if _non_static_mode():
-        if (
-            seed is None or seed == 0
-        ) and default_main_program().random_seed != 0:
-            seed = default_main_program().random_seed
-        if is_test is None:
-            is_test = not _dygraph_tracer()._train_mode
-        out, mask = _legacy_C_ops.dropout(
-            x,
-            'dropout_prob',
-            dropout_prob,
-            'is_test',
-            is_test,
-            'fix_seed',
-            seed is not None,
-            'seed',
-            seed if seed is not None else 0,
-            'dropout_implementation',
-            dropout_implementation,
-        )
-        return out
-
-    def get_attrs(prog, dropout_prob, is_test, seed):
-        if (seed is None or seed == 0) and prog.random_seed != 0:
-            seed = prog.random_seed
-        if isinstance(dropout_prob, Variable) and not dropout_prob.shape != [1]:
-            raise TypeError(
-                "Required dropout_prob.shape == [1] if type(dropout_prob) is Variable, but received dropout_prob.shape = {}".format(
-                    dropout_prob.shape
-                )
-            )
-        attrs = {
-            'dropout_prob': dropout_prob,
-            'is_test': is_test,
-            'fix_seed': seed is not None,
-            'seed': seed if seed is not None else 0,
-            'dropout_implementation': dropout_implementation,
-        }
-        return attrs
-
-    helper = LayerHelper('dropout', **locals())
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'dropout'
-    )
-
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    mask = helper.create_variable_for_type_inference(
-        dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
-    )
-
-    attrs = get_attrs(helper.main_program, dropout_prob, is_test, seed)
-
-    helper.append_op(
-        type='dropout',
-        inputs={'X': [x]},
-        outputs={'Out': [out], 'Mask': [mask]},
-        attrs=attrs,
-    )
-    return out
-
-
 def conv2d(
     input,
     num_filters,

diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py
@@ -260,7 +260,7 @@ def __extend_list__(obj):
             tmp = paddle.static.nn.batch_norm(input=tmp, act=conv_act)
             drop_rate = conv_batchnorm_drop_rate[i]
             if abs(drop_rate) > 1e-5:
-                tmp = layers.dropout(x=tmp, dropout_prob=drop_rate)
+                tmp = paddle.nn.functional.dropout(x=tmp, p=drop_rate)
 
     if pool_type == 'max':
         pool_out = paddle.nn.functional.max_pool2d(
@@ -637,8 +637,6 @@ def __combine_heads(x):
     weights = paddle.reshape(x=x, shape=product.shape)
 
     if dropout_rate:
-        weights = layers.dropout(
-            weights, dropout_prob=dropout_rate, is_test=False
-        )
+        weights = paddle.nn.functional.dropout(weights, p=dropout_rate)
     ctx_multiheads = paddle.matmul(weights, v)
     return __combine_heads(ctx_multiheads)
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -92,10 +92,10 @@ def conv_block(input, num_filter, groups, dropouts):
     conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
     conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
 
-    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
+    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
     fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
     bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
-    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
+    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
     fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
     return fc2
 

diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py
@@ -257,11 +257,9 @@ def forward(self, prev_out, out, process_cmd, dropout_rate=0.0):
                 out = self._layer_norm(out)
             elif cmd == "d":  # add dropout
                 if dropout_rate:
-                    out = fluid.layers.dropout(
+                    out = paddle.nn.functional.dropout(
                         out,
-                        dropout_prob=dropout_rate,
-                        seed=ModelHyperParams.dropout_seed,
-                        is_test=False,
+                        p=dropout_rate,
                     )
         return out
 
@@ -276,11 +274,9 @@ def __init__(self, d_inner_hid, d_hid, dropout_rate):
     def forward(self, x):
         hidden = self._i2h(x)
         if self._dropout_rate:
-            hidden = fluid.layers.dropout(
+            hidden = paddle.nn.functional.dropout(
                 hidden,
-                dropout_prob=self._dropout_rate,
-                seed=ModelHyperParams.dropout_seed,
-                is_test=False,
+                p=self._dropout_rate,
             )
         out = self._h2o(hidden)
         return out
@@ -352,11 +348,9 @@ def forward(self, queries, keys, values, attn_bias):
             product += attn_bias
         weights = paddle.nn.functional.softmax(product)
         if self._dropout_rate:
-            weights_droped = fluid.layers.dropout(
+            weights_droped = paddle.nn.functional.dropout(
                 weights,
-                dropout_prob=self._dropout_rate,
-                seed=ModelHyperParams.dropout_seed,
-                is_test=False,
+                p=self._dropout_rate,
             )
             out = paddle.matmul(weights_droped, transpose_v)
         else:
@@ -548,11 +542,9 @@ def forward(self, src_word, src_pos):
         src_pos_emb.stop_gradient = True
         enc_input = src_word_emb + src_pos_emb
         return (
-            fluid.layers.dropout(
+            paddle.nn.functional.dropout(
                 enc_input,
-                dropout_prob=self._dropout_rate,
-                seed=ModelHyperParams.dropout_seed,
-                is_test=False,
+                p=self._dropout_rate,
             )
             if self._dropout_rate
             else enc_input

diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -113,7 +113,8 @@ def net(self, input, class_dim=1000):
                 )
 
         pool = paddle.nn.functional.adaptive_avg_pool2d(x=conv, output_size=1)
-        drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)
+        drop = paddle.nn.functional.dropout(x=pool, p=0.2)
+
         stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
         out = fluid.layers.fc(
             input=drop,

diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -1179,11 +1179,9 @@ def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate):
             product += attn_bias
         weights = paddle.nn.functional.softmax(product)
         if dropout_rate:
-            weights = layers.dropout(
+            weights = paddle.nn.functional.dropout(
                 weights,
-                dropout_prob=dropout_rate,
-                seed=ModelHyperParams.dropout_seed,
-                is_test=False,
+                p=dropout_rate,
             )
         out = paddle.matmul(weights, v)
         return out
@@ -1258,11 +1256,9 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0):
             )
         elif cmd == "d":  # add dropout
             if dropout_rate:
-                out = layers.dropout(
+                out = paddle.nn.functional.dropout(
                     out,
-                    dropout_prob=dropout_rate,
-                    seed=ModelHyperParams.dropout_seed,
-                    is_test=False,
+                    p=dropout_rate,
                 )
     return out
 
@@ -1318,11 +1314,9 @@ def prepare_encoder(
     src_pos_enc.stop_gradient = True
     enc_input = src_word_emb + src_pos_enc
     return (
-        layers.dropout(
+        paddle.nn.functional.dropout(
             enc_input,
-            dropout_prob=dropout_rate,
-            seed=ModelHyperParams.dropout_seed,
-            is_test=False,
+            p=dropout_rate,
         )
         if dropout_rate
         else enc_input

diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py
@@ -56,9 +56,7 @@ def __init__(
     def forward(self, x):
         hidden = self._i2h(x)
         if self._dropout_rate:
-            hidden = fluid.layers.dropout(
-                hidden, dropout_prob=self._dropout_rate, is_test=False
-            )
+            hidden = paddle.nn.functional.dropout(hidden, p=self._dropout_rate)
         out = self._h2o(hidden)
         return out