diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py index d2dd31f08dcac..fa9243804faf0 100644 --- a/python/paddle/autograd/py_layer.py +++ b/python/paddle/autograd/py_layer.py @@ -33,23 +33,23 @@ class PyLayerContext: Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - # ctx is a object of PyLayerContext. - y = paddle.tanh(x) - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # ctx is a object of PyLayerContext. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... # ctx is a object of PyLayerContext. + ... y = paddle.tanh(x) + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # ctx is a object of PyLayerContext. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad """ def save_for_backward(self, *tensors): @@ -68,24 +68,24 @@ def save_for_backward(self, *tensors): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - # ctx is a context object that store some objects for backward. - y = paddle.tanh(x) - # Pass tensors to backward. - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # Get the tensors passed by forward. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... # ctx is a context object that store some objects for backward. + ... y = paddle.tanh(x) + ... # Pass tensors to backward. + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # Get the tensors passed by forward. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad """ self.container = tensors @@ -101,24 +101,24 @@ def saved_tensor(self): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - # ctx is a context object that store some objects for backward. - y = paddle.tanh(x) - # Pass tensors to backward. - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # Get the tensors passed by forward. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... # ctx is a context object that store some objects for backward. + ... y = paddle.tanh(x) + ... # Pass tensors to backward. + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # Get the tensors passed by forward. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad """ return self.container @@ -135,30 +135,31 @@ def mark_not_inplace(self, *args): Examples: .. code-block:: python - import paddle - - class Exp(paddle.autograd.PyLayer): - @staticmethod - def forward(ctx, x): - ctx.mark_not_inplace(x) - return x - - @staticmethod - def backward(ctx, grad_output): - out = grad_output.exp() - return out - - x = paddle.randn((1, 1)) - x.stop_gradient = False - attn_layers = [] - for idx in range(0, 2): - attn_layers.append(Exp()) - - for step in range(0, 2): - a = x - for j in range(0,2): - a = attn_layers[j].apply(x) - a.backward() + >>> import paddle + + >>> class Exp(paddle.autograd.PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... ctx.mark_not_inplace(x) + ... return x + ... + ... @staticmethod + ... def backward(ctx, grad_output): + ... out = grad_output.exp() + ... return out + + >>> paddle.seed(2023) + >>> x = paddle.randn((1, 1)) + >>> x.stop_gradient = False + >>> attn_layers = [] + >>> for idx in range(0, 2): + ... attn_layers.append(Exp()) + + >>> for step in range(0, 2): + ... a = x + ... for j in range(0,2): + ... a = attn_layers[j].apply(x) + ... a.backward() """ self.not_inplace_tensors = args @@ -177,28 +178,28 @@ def mark_non_differentiable(self, *args): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - import numpy as np - - class Tanh(PyLayer): - @staticmethod - def forward(ctx, x): - a = x + x - b = x + x + x - ctx.mark_non_differentiable(a) - return a, b - - @staticmethod - def backward(ctx, grad_a, grad_b): - assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy()) - assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy()) - return grad_b - - x = paddle.ones([1], dtype="float64") - x.stop_gradient = False - a, b = Tanh.apply(x) - b.sum().backward() + >>> import paddle + >>> from paddle.autograd import PyLayer + >>> import numpy as np + + >>> class Tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... a = x + x + ... b = x + x + x + ... ctx.mark_non_differentiable(a) + ... return a, b + ... + ... @staticmethod + ... def backward(ctx, grad_a, grad_b): + ... assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy()) + ... assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy()) + ... return grad_b + + >>> x = paddle.ones([1], dtype="float64") + >>> x.stop_gradient = False + >>> a, b = Tanh.apply(x) + >>> b.sum().backward() """ self.non_differentiable = args @@ -216,38 +217,38 @@ def set_materialize_grads(self, value: bool): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - import numpy as np - - class Tanh(PyLayer): - @staticmethod - def forward(ctx, x): - return x+x+x, x+x - - @staticmethod - def backward(ctx, grad, grad2): - assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy()) - return grad - - class Tanh2(PyLayer): - @staticmethod - def forward(ctx, x): - ctx.set_materialize_grads(False) - return x+x+x, x+x - - @staticmethod - def backward(ctx, grad, grad2): - assert grad2==None - return grad - - x = paddle.ones([1], dtype="float64") - x.stop_gradient = False - Tanh.apply(x)[0].backward() - - x2 = paddle.ones([1], dtype="float64") - x2.stop_gradient = False - Tanh2.apply(x2)[0].backward() + >>> import paddle + >>> from paddle.autograd import PyLayer + >>> import numpy as np + + >>> class Tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... return x+x+x, x+x + ... + ... @staticmethod + ... def backward(ctx, grad, grad2): + ... assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy()) + ... return grad + + >>> class Tanh2(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... ctx.set_materialize_grads(False) + ... return x+x+x, x+x + ... + ... @staticmethod + ... def backward(ctx, grad, grad2): + ... assert grad2==None + ... return grad + + >>> x = paddle.ones([1], dtype="float64") + >>> x.stop_gradient = False + >>> Tanh.apply(x)[0].backward() + + >>> x2 = paddle.ones([1], dtype="float64") + >>> x2.stop_gradient = False + >>> Tanh2.apply(x2)[0].backward() """ self.materialize_grads = value @@ -290,30 +291,34 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - y = paddle.tanh(x) - # Pass tensors to backward. - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # Get the tensors passed by forward. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad - - data = paddle.randn([2, 3], dtype="float64") - data.stop_gradient = False - z = cus_tanh.apply(data) - z.mean().backward() - - print(data.grad) + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... y = paddle.tanh(x) + ... # Pass tensors to backward. + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # Get the tensors passed by forward. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad + + >>> paddle.seed(2023) + >>> data = paddle.randn([2, 3], dtype="float64") + >>> data.stop_gradient = False + >>> z = cus_tanh.apply(data) + >>> z.mean().backward() + + >>> print(data.grad) + Tensor(shape=[2, 3], dtype=float64, place=Place(cpu), stop_gradient=True, + [[0.16604150, 0.05858341, 0.14051214], + [0.15677770, 0.01564609, 0.02991660]]) """ @staticmethod @@ -333,23 +338,23 @@ def forward(ctx, *args, **kwargs): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - y = paddle.tanh(x) - # Pass tensors to backward. - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # Get the tensors passed by forward. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... y = paddle.tanh(x) + ... # Pass tensors to backward. + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # Get the tensors passed by forward. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad """ raise NotImplementedError( "You must implement the forward function for PyLayer." @@ -373,23 +378,23 @@ def backward(ctx, *args): Examples: .. code-block:: python - import paddle - from paddle.autograd import PyLayer - - class cus_tanh(PyLayer): - @staticmethod - def forward(ctx, x): - y = paddle.tanh(x) - # Pass tensors to backward. - ctx.save_for_backward(y) - return y - - @staticmethod - def backward(ctx, dy): - # Get the tensors passed by forward. - y, = ctx.saved_tensor() - grad = dy * (1 - paddle.square(y)) - return grad + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_tanh(PyLayer): + ... @staticmethod + ... def forward(ctx, x): + ... y = paddle.tanh(x) + ... # Pass tensors to backward. + ... ctx.save_for_backward(y) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... # Get the tensors passed by forward. + ... y, = ctx.saved_tensor() + ... grad = dy * (1 - paddle.square(y)) + ... return grad """ raise NotImplementedError( diff --git a/python/paddle/autograd/saved_tensors_hooks.py b/python/paddle/autograd/saved_tensors_hooks.py index d2be6b5e6bf52..709c646325ed0 100644 --- a/python/paddle/autograd/saved_tensors_hooks.py +++ b/python/paddle/autograd/saved_tensors_hooks.py @@ -45,58 +45,58 @@ class saved_tensors_hooks: Examples: .. code-block:: python - # Example1 - import paddle - - def pack_hook(x): - print("Packing", x) - return x.numpy() - - def unpack_hook(x): - print("UnPacking", x) - return paddle.to_tensor(x) - - a = paddle.ones([3,3]) - b = paddle.ones([3,3]) * 2 - a.stop_gradient = False - b.stop_gradient = False - with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook): - y = paddle.multiply(a, b) - y.sum().backward() - - # Example2 - import paddle - from paddle.autograd import PyLayer - - class cus_multiply(PyLayer): - @staticmethod - def forward(ctx, a, b): - y = paddle.multiply(a, b) - ctx.save_for_backward(a, b) - return y - - @staticmethod - def backward(ctx, dy): - a,b = ctx.saved_tensor() - grad_a = dy * a - grad_b = dy * b - return grad_a, grad_b - - def pack_hook(x): - print("Packing", x) - return x.numpy() - - def unpack_hook(x): - print("UnPacking", x) - return paddle.to_tensor(x) - - a = paddle.ones([3,3]) - b = paddle.ones([3,3]) * 2 - a.stop_gradient = False - b.stop_gradient = False - with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook): - y = cus_multiply.apply(a, b) - y.sum().backward() + >>> # Example1 + >>> import paddle + + >>> def pack_hook(x): + ... print("Packing", x) + ... return x.numpy() + + >>> def unpack_hook(x): + ... print("UnPacking", x) + ... return paddle.to_tensor(x) + + >>> a = paddle.ones([3,3]) + >>> b = paddle.ones([3,3]) * 2 + >>> a.stop_gradient = False + >>> b.stop_gradient = False + >>> with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook): + ... y = paddle.multiply(a, b) + >>> y.sum().backward() + + >>> # Example2 + >>> import paddle + >>> from paddle.autograd import PyLayer + + >>> class cus_multiply(PyLayer): + ... @staticmethod + ... def forward(ctx, a, b): + ... y = paddle.multiply(a, b) + ... ctx.save_for_backward(a, b) + ... return y + ... + ... @staticmethod + ... def backward(ctx, dy): + ... a,b = ctx.saved_tensor() + ... grad_a = dy * a + ... grad_b = dy * b + ... return grad_a, grad_b + + >>> def pack_hook(x): + ... print("Packing", x) + ... return x.numpy() + + >>> def unpack_hook(x): + ... print("UnPacking", x) + ... return paddle.to_tensor(x) + + >>> a = paddle.ones([3,3]) + >>> b = paddle.ones([3,3]) * 2 + >>> a.stop_gradient = False + >>> b.stop_gradient = False + >>> with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook): + ... y = cus_multiply.apply(a, b) + >>> y.sum().backward() """ def __init__(self, pack_hook, unpack_hook): diff --git a/python/paddle/framework/dtype.py b/python/paddle/framework/dtype.py index 42cd074d88f11..6640407084785 100644 --- a/python/paddle/framework/dtype.py +++ b/python/paddle/framework/dtype.py @@ -58,15 +58,19 @@ def iinfo(dtype): Examples: .. code-block:: python - import paddle - - iinfo_uint8 = paddle.iinfo(paddle.uint8) - print(iinfo_uint8) - # paddle.iinfo(min=0, max=255, bits=8, dtype=uint8) - print(iinfo_uint8.min) # 0 - print(iinfo_uint8.max) # 255 - print(iinfo_uint8.bits) # 8 - print(iinfo_uint8.dtype) # uint8 + >>> import paddle + + >>> iinfo_uint8 = paddle.iinfo(paddle.uint8) + >>> print(iinfo_uint8) + paddle.iinfo(min=0, max=255, bits=8, dtype=uint8) + >>> print(iinfo_uint8.min) + 0 + >>> print(iinfo_uint8.max) + 255 + >>> print(iinfo_uint8.bits) + 8 + >>> print(iinfo_uint8.dtype) + uint8 """ return core_iinfo(dtype) @@ -98,17 +102,25 @@ def finfo(dtype): Examples: .. code-block:: python - import paddle - - finfo_float32 = paddle.finfo(paddle.float32) - print(finfo_float32.min) # -3.40282e+38 - print(finfo_float32.max) # 3.40282e+38 - print(finfo_float32.eps) # 1.19209e-07 - print(finfo_float32.resolution) # 1e-06 - print(finfo_float32.smallest_normal) # 1.17549e-38 - print(finfo_float32.tiny) # 1.17549e-38 - print(finfo_float32.bits) # 32 - print(finfo_float32.dtype) # float32 + >>> import paddle + + >>> finfo_float32 = paddle.finfo(paddle.float32) + >>> print(finfo_float32.min) + -3.4028234663852886e+38 + >>> print(finfo_float32.max) + 3.4028234663852886e+38 + >>> print(finfo_float32.eps) + 1.1920928955078125e-07 + >>> print(finfo_float32.resolution) + 1e-06 + >>> print(finfo_float32.smallest_normal) + 1.1754943508222875e-38 + >>> print(finfo_float32.tiny) + 1.1754943508222875e-38 + >>> print(finfo_float32.bits) + 32 + >>> print(finfo_float32.dtype) + float32 """ return core_finfo(dtype) diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py index 563c4b4cd3b20..6f8c7c7555bf0 100644 --- a/python/paddle/framework/framework.py +++ b/python/paddle/framework/framework.py @@ -35,8 +35,8 @@ def set_default_dtype(d): Examples: .. code-block:: python - import paddle - paddle.set_default_dtype("float32") + >>> import paddle + >>> paddle.set_default_dtype("float32") """ if isinstance(d, type): @@ -76,7 +76,7 @@ def get_default_dtype(): Examples: .. code-block:: python - import paddle - paddle.get_default_dtype() + >>> import paddle + >>> paddle.get_default_dtype() """ return LayerHelperBase.get_default_dtype() diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 2c526afc98e18..cc9ed4768ced9 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -677,100 +677,101 @@ def save(obj, path, protocol=4, **configs): .. code-block:: python :name: code-example-1 - # example 1: dynamic graph - import paddle - emb = paddle.nn.Embedding(10, 10) - layer_state_dict = emb.state_dict() - - # save state_dict of emb - paddle.save(layer_state_dict, "emb.pdparams") - - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, - parameters=emb.parameters()) - opt_state_dict = adam.state_dict() - - # save state_dict of optimizer - paddle.save(opt_state_dict, "adam.pdopt") - # save weight of emb - paddle.save(emb.weight, "emb.weight.pdtensor") + >>> # example 1: dynamic graph + >>> import paddle + >>> emb = paddle.nn.Embedding(10, 10) + >>> layer_state_dict = emb.state_dict() + + >>> # save state_dict of emb + >>> paddle.save(layer_state_dict, "emb.pdparams") + + >>> scheduler = paddle.optimizer.lr.NoamDecay( + ... d_model=0.01, warmup_steps=100, verbose=True) + >>> adam = paddle.optimizer.Adam( + ... learning_rate=scheduler, + ... parameters=emb.parameters()) + >>> opt_state_dict = adam.state_dict() + + >>> # save state_dict of optimizer + >>> paddle.save(opt_state_dict, "adam.pdopt") + >>> # save weight of emb + >>> paddle.save(emb.weight, "emb.weight.pdtensor") .. code-block:: python :name: code-example-2 - # example 2: Save multiple state_dict at the same time - import paddle - from paddle import nn - from paddle.optimizer import Adam + >>> # example 2: Save multiple state_dict at the same time + >>> import paddle + >>> from paddle import nn + >>> from paddle.optimizer import Adam - layer = paddle.nn.Linear(3, 4) - adam = Adam(learning_rate=0.001, parameters=layer.parameters()) - obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100} - path = 'example/model.pdparams' - paddle.save(obj, path) + >>> layer = paddle.nn.Linear(3, 4) + >>> adam = Adam(learning_rate=0.001, parameters=layer.parameters()) + >>> obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100} + >>> path = 'example/model.pdparams' + >>> paddle.save(obj, path) .. code-block:: python :name: code-example-3 - # example 3: static graph - import paddle - import paddle.static as static + >>> # example 3: static graph + >>> import paddle + >>> import paddle.static as static - paddle.enable_static() + >>> paddle.enable_static() - # create network - x = paddle.static.data(name="x", shape=[None, 224], dtype='float32') - z = paddle.static.nn.fc(x, 10) + >>> # create network + >>> x = paddle.static.data(name="x", shape=[None, 224], dtype='float32') + >>> z = paddle.static.nn.fc(x, 10) - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - prog = paddle.static.default_main_program() - for var in prog.list_vars(): - if list(var.shape) == [224, 10]: - tensor = var.get_value() - break + >>> place = paddle.CPUPlace() + >>> exe = paddle.static.Executor(place) + >>> exe.run(paddle.static.default_startup_program()) + >>> prog = paddle.static.default_main_program() + >>> for var in prog.list_vars(): + ... if list(var.shape) == [224, 10]: + ... tensor = var.get_value() + ... break - # save/load tensor - path_tensor = 'temp/tensor.pdtensor' - paddle.save(tensor, path_tensor) + >>> # save/load tensor + >>> path_tensor = 'temp/tensor.pdtensor' + >>> paddle.save(tensor, path_tensor) - # save/load state_dict - path_state_dict = 'temp/model.pdparams' - paddle.save(prog.state_dict("param"), path_tensor) + >>> # save/load state_dict + >>> path_state_dict = 'temp/model.pdparams' + >>> paddle.save(prog.state_dict("param"), path_tensor) .. code-block:: python :name: code-example-4 - # example 4: save program - import paddle + >>> # example 4: save program + >>> import paddle - paddle.enable_static() + >>> paddle.enable_static() - data = paddle.static.data( - name='x_static_save', shape=(None, 224), dtype='float32') - y_static = z = paddle.static.nn.fc(data, 10) - main_program = paddle.static.default_main_program() - path = "example/main_program.pdmodel" - paddle.save(main_program, path) + >>> data = paddle.static.data( + ... name='x_static_save', shape=(None, 224), dtype='float32') + >>> y_static = z = paddle.static.nn.fc(data, 10) + >>> main_program = paddle.static.default_main_program() + >>> path = "example/main_program.pdmodel" + >>> paddle.save(main_program, path) .. code-block:: python :name: code-example-5 - # example 5: save object to memory - from io import BytesIO - import paddle - from paddle.nn import Linear - paddle.disable_static() + >>> # example 5: save object to memory + >>> from io import BytesIO + >>> import paddle + >>> from paddle.nn import Linear + >>> paddle.disable_static() - linear = Linear(5, 10) - state_dict = linear.state_dict() - byio = BytesIO() - paddle.save(state_dict, byio) - tensor = paddle.randn([2, 3], dtype='float32') - paddle.save(tensor, byio) + >>> linear = Linear(5, 10) + >>> state_dict = linear.state_dict() + >>> byio = BytesIO() + >>> paddle.save(state_dict, byio) + >>> paddle.seed(2023) + >>> tensor = paddle.randn([2, 3], dtype='float32') + >>> paddle.save(tensor, byio) ''' if _is_file_path(path): @@ -938,115 +939,115 @@ def load(path, **configs): .. code-block:: python :name: code-example-1 - # example 1: dynamic graph - import paddle - emb = paddle.nn.Embedding(10, 10) - layer_state_dict = emb.state_dict() - - # save state_dict of emb - paddle.save(layer_state_dict, "emb.pdparams") - - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, - parameters=emb.parameters()) - opt_state_dict = adam.state_dict() - - # save state_dict of optimizer - paddle.save(opt_state_dict, "adam.pdopt") - # save weight of emb - paddle.save(emb.weight, "emb.weight.pdtensor") - - # load state_dict of emb - load_layer_state_dict = paddle.load("emb.pdparams") - # load state_dict of optimizer - load_opt_state_dict = paddle.load("adam.pdopt") - # load weight of emb - load_weight = paddle.load("emb.weight.pdtensor") + >>> # example 1: dynamic graph + >>> import paddle + >>> emb = paddle.nn.Embedding(10, 10) + >>> layer_state_dict = emb.state_dict() + + >>> # save state_dict of emb + >>> paddle.save(layer_state_dict, "emb.pdparams") + + >>> scheduler = paddle.optimizer.lr.NoamDecay( + ... d_model=0.01, warmup_steps=100, verbose=True) + >>> adam = paddle.optimizer.Adam( + ... learning_rate=scheduler, + ... parameters=emb.parameters()) + >>> opt_state_dict = adam.state_dict() + + >>> # save state_dict of optimizer + >>> paddle.save(opt_state_dict, "adam.pdopt") + >>> # save weight of emb + >>> paddle.save(emb.weight, "emb.weight.pdtensor") + + >>> # load state_dict of emb + >>> load_layer_state_dict = paddle.load("emb.pdparams") + >>> # load state_dict of optimizer + >>> load_opt_state_dict = paddle.load("adam.pdopt") + >>> # load weight of emb + >>> load_weight = paddle.load("emb.weight.pdtensor") .. code-block:: python :name: code-example-2 - # example 2: Load multiple state_dict at the same time - import paddle - from paddle import nn - from paddle.optimizer import Adam + >>> # example 2: Load multiple state_dict at the same time + >>> import paddle + >>> from paddle import nn + >>> from paddle.optimizer import Adam - layer = paddle.nn.Linear(3, 4) - adam = Adam(learning_rate=0.001, parameters=layer.parameters()) - obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100} - path = 'example/model.pdparams' - paddle.save(obj, path) - obj_load = paddle.load(path) + >>> layer = paddle.nn.Linear(3, 4) + >>> adam = Adam(learning_rate=0.001, parameters=layer.parameters()) + >>> obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100} + >>> path = 'example/model.pdparams' + >>> paddle.save(obj, path) + >>> obj_load = paddle.load(path) .. code-block:: python :name: code-example-3 - # example 3: static graph - import paddle - import paddle.static as static + >>> # example 3: static graph + >>> import paddle + >>> import paddle.static as static - paddle.enable_static() + >>> paddle.enable_static() - # create network - x = paddle.static.data(name="x", shape=[None, 224], dtype='float32') - z = paddle.static.nn.fc(x, 10) + >>> # create network + >>> x = paddle.static.data(name="x", shape=[None, 224], dtype='float32') + >>> z = paddle.static.nn.fc(x, 10) - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - prog = paddle.static.default_main_program() - for var in prog.list_vars(): - if list(var.shape) == [224, 10]: - tensor = var.get_value() - break + >>> place = paddle.CPUPlace() + >>> exe = paddle.static.Executor(place) + >>> exe.run(paddle.static.default_startup_program()) + >>> prog = paddle.static.default_main_program() + >>> for var in prog.list_vars(): + ... if list(var.shape) == [224, 10]: + ... tensor = var.get_value() + ... break - # save/load tensor - path_tensor = 'temp/tensor.pdtensor' - paddle.save(tensor, path_tensor) - load_tensor = paddle.load(path_tensor) + >>> # save/load tensor + >>> path_tensor = 'temp/tensor.pdtensor' + >>> paddle.save(tensor, path_tensor) + >>> load_tensor = paddle.load(path_tensor) - # save/load state_dict - path_state_dict = 'temp/model.pdparams' - paddle.save(prog.state_dict("param"), path_tensor) - load_state_dict = paddle.load(path_tensor) + >>> # save/load state_dict + >>> path_state_dict = 'temp/model.pdparams' + >>> paddle.save(prog.state_dict("param"), path_tensor) + >>> load_state_dict = paddle.load(path_tensor) .. code-block:: python :name: code-example-4 - # example 4: load program - import paddle + >>> # example 4: load program + >>> import paddle - paddle.enable_static() + >>> paddle.enable_static() - data = paddle.static.data( - name='x_static_save', shape=(None, 224), dtype='float32') - y_static = z = paddle.static.nn.fc(data, 10) - main_program = paddle.static.default_main_program() - path = "example/main_program.pdmodel" - paddle.save(main_program, path) - load_main = paddle.load(path) - print(load_main) + >>> data = paddle.static.data( + ... name='x_static_save', shape=(None, 224), dtype='float32') + >>> y_static = z = paddle.static.nn.fc(data, 10) + >>> main_program = paddle.static.default_main_program() + >>> path = "example/main_program.pdmodel" + >>> paddle.save(main_program, path) + >>> load_main = paddle.load(path) .. code-block:: python :name: code-example-5 - # example 5: save object to memory - from io import BytesIO - import paddle - from paddle.nn import Linear - paddle.disable_static() - - linear = Linear(5, 10) - state_dict = linear.state_dict() - byio = BytesIO() - paddle.save(state_dict, byio) - tensor = paddle.randn([2, 3], dtype='float32') - paddle.save(tensor, byio) - byio.seek(0) - # load state_dict - dict_load = paddle.load(byio) + >>> # example 5: save object to memory + >>> from io import BytesIO + >>> import paddle + >>> from paddle.nn import Linear + >>> paddle.disable_static() + + >>> linear = Linear(5, 10) + >>> state_dict = linear.state_dict() + >>> byio = BytesIO() + >>> paddle.save(state_dict, byio) + >>> paddle.seed(2023) + >>> tensor = paddle.randn([2, 3], dtype='float32') + >>> paddle.save(tensor, byio) + >>> byio.seek(0) + >>> # load state_dict + >>> dict_load = paddle.load(byio) ''' diff --git a/python/paddle/framework/io_utils.py b/python/paddle/framework/io_utils.py index b4c78c6748a1b..1c72bc2852d0c 100644 --- a/python/paddle/framework/io_utils.py +++ b/python/paddle/framework/io_utils.py @@ -94,12 +94,13 @@ def is_persistable(var): Examples: .. code-block:: python - import paddle - import paddle.fluid as fluid + >>> # doctest: +SKIP('ValueError: var fc.b not in this block') + >>> import paddle + >>> import paddle.fluid as fluid - paddle.enable_static() - param = fluid.default_main_program().global_block().var('fc.b') - res = fluid.io.is_persistable(param) + >>> paddle.enable_static() + >>> param = fluid.default_main_program().global_block().var('fc.b') + >>> res = fluid.io.is_persistable(param) """ if ( var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH @@ -124,12 +125,13 @@ def is_parameter(var): Examples: .. code-block:: python - import paddle - import paddle.fluid as fluid + >>> # doctest: +SKIP('ValueError: var fc.w not in this block') + >>> import paddle + >>> import paddle.fluid as fluid - paddle.enable_static() - param = fluid.default_main_program().global_block().var('fc.w') - res = fluid.io.is_parameter(param) + >>> paddle.enable_static() + >>> param = fluid.default_main_program().global_block().var('fc.w') + >>> res = fluid.io.is_parameter(param) """ return isinstance(var, Parameter) diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index fff7f5eecd987..9670e79b457bd 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -34,8 +34,8 @@ def seed(seed): Examples: .. code-block:: python - import paddle - gen = paddle.seed(102) + >>> import paddle + >>> gen = paddle.seed(102) """ # TODO(zhiqiu): 1. remove program.random_seed when all random-related op upgrade @@ -75,8 +75,8 @@ def get_rng_state(device=None): GeneratorState: object. Examples: .. code-block:: python - import paddle - sts = paddle.get_rng_state() + >>> import paddle + >>> sts = paddle.get_rng_state() """ state_list = [] if device is None: @@ -129,8 +129,8 @@ def get_cuda_rng_state(): Examples: .. code-block:: python - import paddle - sts = paddle.get_cuda_rng_state() + >>> import paddle + >>> sts = paddle.get_cuda_rng_state() """ state_list = [] @@ -158,9 +158,9 @@ def set_rng_state(state_list, device=None): Examples: .. code-block:: python - import paddle - sts = paddle.get_rng_state() - paddle.set_rng_state(sts) + >>> import paddle + >>> sts = paddle.get_rng_state() + >>> paddle.set_rng_state(sts) """ if device is None: @@ -223,9 +223,9 @@ def set_cuda_rng_state(state_list): Examples: .. code-block:: python - import paddle - sts = paddle.get_cuda_rng_state() - paddle.set_cuda_rng_state(sts) + >>> import paddle + >>> sts = paddle.get_cuda_rng_state() + >>> paddle.set_cuda_rng_state(sts) """ if core.is_compiled_with_cuda(): diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py index 9782521c959fc..403f8773f15b1 100644 --- a/python/paddle/nn/initializer/Bilinear.py +++ b/python/paddle/nn/initializer/Bilinear.py @@ -36,29 +36,29 @@ class Bilinear(Initializer): .. code-block:: python - import math - - import paddle - import paddle.nn as nn - from paddle.regularizer import L2Decay - - factor = 2 - C = 2 - B = 8 - H = W = 32 - w_attr = paddle.ParamAttr(learning_rate=0., - regularizer=L2Decay(0.), - initializer=nn.initializer.Bilinear()) - data = paddle.rand([B, 3, H, W], dtype='float32') - conv_up = nn.Conv2DTranspose(3, - out_channels=C, - kernel_size=2 * factor - factor % 2, - padding=int( - math.ceil((factor - 1) / 2.)), - stride=factor, - weight_attr=w_attr, - bias_attr=False) - x = conv_up(data) + >>> import math + + >>> import paddle + >>> import paddle.nn as nn + >>> from paddle.regularizer import L2Decay + + >>> factor = 2 + >>> C = 2 + >>> B = 8 + >>> H = W = 32 + >>> w_attr = paddle.ParamAttr(learning_rate=0., + ... regularizer=L2Decay(0.), + ... initializer=nn.initializer.Bilinear()) + >>> data = paddle.rand([B, 3, H, W], dtype='float32') + >>> conv_up = nn.Conv2DTranspose(3, + ... out_channels=C, + ... kernel_size=2 * factor - factor % 2, + ... padding=int( + ... math.ceil((factor - 1) / 2.)), + ... stride=factor, + ... weight_attr=w_attr, + ... bias_attr=False) + >>> x = conv_up(data) Where, `out_channels=C` and `groups=C` means this is channel-wise transposed convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`, diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index aaa198ec46942..b85f3e7509fa8 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -153,53 +153,62 @@ class Assign(NumpyArrayInitializer): Examples: .. code-block:: python - import paddle - import numpy as np - - # numpy array - data_1 = paddle.ones(shape=[1, 2], dtype='float32') - weight_attr_1 = paddle.framework.ParamAttr( - name="linear_weight_1", - initializer=paddle.nn.initializer.Assign(np.array([2, 2]))) - bias_attr_1 = paddle.framework.ParamAttr( - name="linear_bias_1", - initializer=paddle.nn.initializer.Assign(np.array([2]))) - linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1) - # linear_1.weight: [2. 2.] - # linear_1.bias: [2.] - - res_1 = linear_1(data_1) - # res_1: [6.] - - # python list - data_2 = paddle.ones(shape=[1, 2], dtype='float32') - weight_attr_2 = paddle.framework.ParamAttr( - name="linear_weight_2", - initializer=paddle.nn.initializer.Assign([2, 2])) - bias_attr_2 = paddle.framework.ParamAttr( - name="linear_bias_2", - initializer=paddle.nn.initializer.Assign([2])) - linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2) - # linear_2.weight: [2. 2.] - # linear_2.bias: [2.] - - res_2 = linear_2(data_2) - # res_2: [6.] - - # tensor - data_3 = paddle.ones(shape=[1, 2], dtype='float32') - weight_attr_3 = paddle.framework.ParamAttr( - name="linear_weight_3", - initializer=paddle.nn.initializer.Assign(paddle.full([2], 2))) - bias_attr_3 = paddle.framework.ParamAttr( - name="linear_bias_3", - initializer=paddle.nn.initializer.Assign(paddle.full([1], 2))) - linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3) - # linear_3.weight: [2. 2.] - # linear_3.bias: [2.] - - res_3 = linear_3(data_3) - # res_3: [6.] + >>> import paddle + >>> import numpy as np + + >>> # numpy array + >>> data_1 = paddle.ones(shape=[1, 2], dtype='float32') + >>> weight_attr_1 = paddle.framework.ParamAttr( + ... name="linear_weight_1", + ... initializer=paddle.nn.initializer.Assign(np.array([2, 2]))) + >>> bias_attr_1 = paddle.framework.ParamAttr( + ... name="linear_bias_1", + ... initializer=paddle.nn.initializer.Assign(np.array([2]))) + >>> linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1) + >>> print(linear_1.weight.numpy()) + [2. 2.] + >>> print(linear_1.bias.numpy()) + [2.] + + >>> res_1 = linear_1(data_1) + >>> print(res_1.numpy()) + [6.] + + >>> # python list + >>> data_2 = paddle.ones(shape=[1, 2], dtype='float32') + >>> weight_attr_2 = paddle.framework.ParamAttr( + ... name="linear_weight_2", + ... initializer=paddle.nn.initializer.Assign([2, 2])) + >>> bias_attr_2 = paddle.framework.ParamAttr( + ... name="linear_bias_2", + ... initializer=paddle.nn.initializer.Assign([2])) + >>> linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2) + >>> print(linear_2.weight.numpy()) + [2. 2.] + >>> print(linear_2.bias.numpy()) + [2.] + + >>> res_2 = linear_2(data_2) + >>> print(res_2.numpy()) + [6.] + + >>> # tensor + >>> data_3 = paddle.ones(shape=[1, 2], dtype='float32') + >>> weight_attr_3 = paddle.framework.ParamAttr( + ... name="linear_weight_3", + ... initializer=paddle.nn.initializer.Assign(paddle.full([2], 2))) + >>> bias_attr_3 = paddle.framework.ParamAttr( + ... name="linear_bias_3", + ... initializer=paddle.nn.initializer.Assign(paddle.full([1], 2))) + >>> linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3) + >>> print(linear_3.weight.numpy()) + [2. 2.] + >>> print(linear_3.bias.numpy()) + [2.] + + >>> res_3 = linear_3(data_3) + >>> print(res_3.numpy()) + [6.] """ def __init__(self, value, name=None): diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index d58aa653cb6e6..355bac8b784e3 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -88,18 +88,20 @@ class Constant(ConstantInitializer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - data = paddle.rand([30, 10, 2], dtype='float32') - linear = nn.Linear(2, - 4, - weight_attr=nn.initializer.Constant(value=2.0)) - res = linear(data) - print(linear.weight) - # Tensor(shape=[2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[2., 2., 2., 2.], - # [2., 2., 2., 2.]]) + >>> import paddle + >>> import paddle.nn as nn + + >>> paddle.seed(2023) + >>> data = paddle.rand([30, 10, 2], dtype='float32') + >>> linear = nn.Linear(2, + ... 4, + ... weight_attr=nn.initializer.Constant(value=2.0)) + >>> res = linear(data) + >>> print(linear.weight) + Parameter containing: + Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False, + [[2., 2., 2., 2.], + [2., 2., 2., 2.]]) """