From 2a6799419ad8ad6f7eca7e791342bc950afc94c2 Mon Sep 17 00:00:00 2001 From: megemini Date: Fri, 22 Sep 2023 19:04:23 +0800 Subject: [PATCH 1/4] [Change] xdoctest part 1 --- paddle/fluid/pybind/eager_properties.cc | 183 ++++++----- python/paddle/base/dataset.py | 9 +- python/paddle/base/framework.py | 36 ++- python/paddle/base/reader.py | 386 ++++++++++++------------ 4 files changed, 330 insertions(+), 284 deletions(-) diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 517c210830022..0ab91d632e4fc 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -51,12 +51,14 @@ Tensor's name. Examples: .. code-block:: python - import paddle - - x = paddle.to_tensor(1.) - print(x.name) # generated_tensor_0 - x.name = 'test_tensor_name' - print(x.name) # test_tensor_name + >>> import paddle + + >>> x = paddle.to_tensor(1.) + >>> print(x.name) + generated_tensor_0 + >>> x.name = 'test_tensor_name' + >>> print(x.name) + test_tensor_name )DOC"); PyObject* tensor_properties_get_name(TensorObject* self, void* closure) { @@ -84,10 +86,11 @@ Tensor's type. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor(1.) - print(x.type) # VarType.LOD_TENSOR + >>> x = paddle.to_tensor(1.) + >>> print(x.type) + VarType.LOD_TENSOR )DOC"); PyObject* tensor_properties_get_type(TensorObject* self, void* closure) { @@ -123,20 +126,27 @@ For the Tensor whose stop_gradient is ``False`` , it will be leaf Tensor too if Examples: .. code-block:: python - import paddle + >>> import paddle + + >>> x = paddle.to_tensor(1.) + >>> print(x.is_leaf) + True + + >>> x = paddle.to_tensor(1., stop_gradient=True) + >>> y = x + 1 + >>> print(x.is_leaf) + True - x = paddle.to_tensor(1.) - print(x.is_leaf) # True + >>> print(y.is_leaf) + True - x = paddle.to_tensor(1., stop_gradient=True) - y = x + 1 - print(x.is_leaf) # True - print(y.is_leaf) # True + >>> x = paddle.to_tensor(1., stop_gradient=False) + >>> y = x + 1 + >>> print(x.is_leaf) + True - x = paddle.to_tensor(1., stop_gradient=False) - y = x + 1 - print(x.is_leaf) # True - print(y.is_leaf) # False + >>> print(y.is_leaf) + False )DOC"); PyObject* tensor_properties_is_leaf(TensorObject* self, void* closure) { @@ -165,12 +175,15 @@ Tensor's stop_gradient. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor(1.) - print(x.stop_gradient) # True - x.stop_gradient = False - print(x.stop_gradient) # False + >>> x = paddle.to_tensor(1.) + >>> print(x.stop_gradient) + True + + >>> x.stop_gradient = False + >>> print(x.stop_gradient) + False )DOC"); PyObject* tensor_properties_get_stop_gradient(TensorObject* self, @@ -192,14 +205,25 @@ Tensor's self. Examples: .. code-block:: python - import paddle + >>> import paddle + + >>> x = paddle.to_tensor(1.) + >>> print(x) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.) - x = paddle.to_tensor(1.) - print(x) - print(x.data) - x.data = paddle.to_tensor(2.) - print(x) - print(x.data) + >>> print(x.data) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.) + + >>> x.data = paddle.to_tensor(2.) + >>> print(x) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) + + >>> print(x.data) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.) )DOC"); PyObject* tensor_properties_get_data(TensorObject* self, void* closure) { EAGER_TRY @@ -234,14 +258,19 @@ Tensor's grad Tensor. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor(1.0, stop_gradient=False) - y = x**2 - y.backward() - print(x.grad) - x.grad = paddle.to_tensor(3.0) - print(x.grad) + >>> x = paddle.to_tensor(1.0, stop_gradient=False) + >>> y = x**2 + >>> y.backward() + >>> print(x.grad) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=False, + 2.) + + >>> x.grad = paddle.to_tensor(3.0) + >>> print(x.grad) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=False, + 3.) )DOC"); PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) { EAGER_TRY @@ -320,12 +349,15 @@ Tensor's persistable. Examples: .. code-block:: python - import paddle + >>> import paddle + + >>> x = paddle.to_tensor(1.0, stop_gradient=False) + >>> print(x.persistable) + False - x = paddle.to_tensor(1.0, stop_gradient=False) - print(x.persistable) # False - x. persistable = True - print(x.persistable) # True + >>> x. persistable = True + >>> print(x.persistable) + True )DOC"); PyObject* tensor_properties_get_persistable(TensorObject* self, void* closure) { @@ -356,17 +388,18 @@ Get dist_attr property from shard tensor. Examples: .. code-block:: python - import paddle - import paddle.distributed as dist + >>> # doctest: +REQUIRES(env:DISTRIBUTED) + >>> import paddle + >>> import paddle.distributed as dist - mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) - dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) + >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) + >>> dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) - a = paddle.to_tensor([[1,2,3], - [5,6,7]]) - d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) + >>> a = paddle.to_tensor([[1,2,3], + ... [5,6,7]]) + >>> d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) - print(d_tensor.dist_attr) + >>> print(d_tensor.dist_attr) )DOC"); @@ -421,10 +454,11 @@ Tensor's shape. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor(1.0, stop_gradient=False) - print(x.shape) + >>> x = paddle.to_tensor(1.0, stop_gradient=False) + >>> print(x.shape) + [] )DOC"); PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { @@ -507,11 +541,12 @@ Tensor's strides. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1, 2, 3]) - y = x[1] - print(y.strides) + >>> x = paddle.to_tensor([1, 2, 3]) + >>> y = x[1] + >>> print(y.strides) + [] )DOC"); PyObject* tensor_properties_get_strides(TensorObject* self, void* closure) { @@ -544,11 +579,12 @@ The address of the first element relative to the offset of the video memory. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1, 2, 3]) - y = x[1] - print(y.offset) + >>> x = paddle.to_tensor([1, 2, 3]) + >>> y = x[1] + >>> print(y.offset) + 8 )DOC"); PyObject* tensor_properties_get_offset(TensorObject* self, void* closure) { EAGER_TRY @@ -579,10 +615,11 @@ Tensor's memory layout. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1, 2, 3]) - print(x.layout) + >>> x = paddle.to_tensor([1, 2, 3]) + >>> print(x.layout) + NCHW )DOC"); PyObject* tensor_properties_get_layout(TensorObject* self, void* closure) { EAGER_TRY @@ -613,10 +650,11 @@ The device Tensor's memory locate. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1, 2, 3]) - print(x.place) + >>> x = paddle.to_tensor([1, 2, 3]) + >>> print(x.place) + Place(cpu) )DOC"); PyObject* tensor_properties_get_place(TensorObject* self, void* closure) { EAGER_TRY @@ -643,10 +681,11 @@ Tensor's data type. Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1, 2, 3]) - print(x.dtype) + >>> x = paddle.to_tensor([1, 2, 3]) + >>> print(x.dtype) + paddle.int64 )DOC"); PyObject* tensor_properties_get_dtype(TensorObject* self, void* closure) { EAGER_TRY diff --git a/python/paddle/base/dataset.py b/python/paddle/base/dataset.py index c15f6e8e6e68a..252ac2803be66 100644 --- a/python/paddle/base/dataset.py +++ b/python/paddle/base/dataset.py @@ -145,9 +145,9 @@ def set_fea_eval(self, record_candidate_size, fea_eval=True): Examples: .. code-block:: python - import paddle.base as base - dataset = base.DatasetFactory().create_dataset("InMemoryDataset") - dataset.set_fea_eval(1000000, True) + >>> import paddle.base as base + >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset.set_fea_eval(1000000, True) """ if fea_eval: @@ -1089,7 +1089,6 @@ def set_graph_config(self, config): Examples: .. code-block:: python - >>> # doctest: +SKIP >>> import paddle.base as base >>> from paddle.incubate.distributed.fleet.parameter_server.pslib import fleet >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") @@ -1441,7 +1440,7 @@ def slots_shuffle(self, slots): .. code-block:: python >>> import paddle.base as base - >>> dataset = base.DatasetFactory().create_dataset("InMemoryDataset") + >>> dataset = base.DatasetFactory().create_dataset("BoxPSDataset") >>> dataset.set_merge_by_lineid() >>> #suppose there is a slot 0 >>> dataset.slots_shuffle(['0']) diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py index 26cfadb44216d..dd4eae82a7b60 100644 --- a/python/paddle/base/framework.py +++ b/python/paddle/base/framework.py @@ -997,10 +997,11 @@ def cuda_pinned_places(device_count=None): Examples: .. code-block:: python - import paddle.base as base - cuda_pinned_places_cpu_num = base.cuda_pinned_places() - # or - cuda_pinned_places = base.cuda_pinned_places(1) + >>> # doctest: +REQUIRES(env:GPU) + >>> import paddle.base as base + >>> cuda_pinned_places_cpu_num = base.cuda_pinned_places() + >>> # or + >>> cuda_pinned_places = base.cuda_pinned_places(1) """ assert core.is_compiled_with_cuda(), "Not compiled with CUDA" @@ -1929,6 +1930,7 @@ def stop_gradient(self): Examples: .. code-block:: python + >>> import paddle >>> import paddle.base as base >>> import numpy as np @@ -1936,18 +1938,18 @@ def stop_gradient(self): ... value0 = np.arange(26).reshape(2, 13).astype("float32") ... value1 = np.arange(6).reshape(2, 3).astype("float32") ... value2 = np.arange(10).reshape(2, 5).astype("float32") - ... linear = base.Linear(13, 5, dtype="float32") - ... linear2 = base.Linear(3, 3, dtype="float32") + ... linear = paddle.nn.Linear(13, 5) + ... linear2 = paddle.nn.Linear(3, 3) ... a = base.dygraph.to_variable(value0) ... b = base.dygraph.to_variable(value1) ... c = base.dygraph.to_variable(value2) ... out1 = linear(a) ... out2 = linear2(b) ... out1.stop_gradient = True - ... out = base.layers.concat(input=[out1, out2, c], axis=1) + ... out = paddle.concat(x=[out1, out2, c], axis=1) ... out.backward() ... assert linear.weight.gradient() is None - ... assert (out1.gradient() == 0).all() + ... assert out1.gradient() is None """ return self.desc.stop_gradient() @@ -1994,6 +1996,7 @@ def is_parameter(self): .. code-block:: python >>> import paddle + >>> paddle.enable_static() >>> new_parameter = paddle.static.create_parameter(name="X", ... shape=[10, 23, 48], ... dtype='float32') @@ -2846,10 +2849,15 @@ class Operator: Examples: .. code-block:: python + >>> import paddle + >>> paddle.enable_static() >>> import paddle.base as base >>> cur_program = base.Program() >>> cur_block = cur_program.current_block() - >>> # var1 += var2 + var3 + >>> var1 = cur_block.create_var(name="var1", shape=[-1, 23, 48], dtype='float32') + >>> var2 = cur_block.create_var(name="var2", shape=[-1, 23, 48], dtype='float32') + >>> var3 = cur_block.create_var(name="var3", shape=[-1, 23, 48], dtype='float32') + >>> var1 += var2 + var3 >>> cur_block.append_op(type="sum", ... inputs={"X": [var1, var2, var3]}, ... outputs={"Out": [var1]}) @@ -3197,6 +3205,8 @@ def _to_readable_code(self, skip_op_callstack=True): Examples: .. code-block:: python + >>> import paddle + >>> paddle.enable_static() >>> import paddle.base as base >>> cur_program = base.Program() @@ -3928,6 +3938,8 @@ class Block: Examples: .. code-block:: python + >>> import paddle + >>> paddle.enable_static() >>> import paddle.base as base >>> cur_program = base.Program() @@ -3967,6 +3979,8 @@ def _to_readable_code(self, skip_op_callstack=True): Examples: .. code-block:: python + >>> import paddle + >>> paddle.enable_static() >>> import paddle.base as base >>> cur_program = base.Program() @@ -7278,9 +7292,9 @@ def to_string(self, throw_on_error, with_details=False): Examples: .. code-block:: python - >>> import paddle.base as base >>> import paddle - + >>> import paddle.base as base + >>> paddle.enable_static() >>> prog = base.default_main_program() >>> rlt = paddle.static.data("fake_data", shape=[-1,1,1], dtype='float32') >>> debug_str = prog.to_string(throw_on_error=True, with_details=False) diff --git a/python/paddle/base/reader.py b/python/paddle/base/reader.py index e749e707b65c6..8c2ddd16961da 100644 --- a/python/paddle/base/reader.py +++ b/python/paddle/base/reader.py @@ -217,199 +217,197 @@ def from_generator( Returns: loader (DataLoader): the created DataLoader object. - Examples 1: - + Examples: .. code-block:: python + :name: example_1 - ''' - Example in static graph mode - ''' - import numpy as np - - import paddle - import paddle.static as static - import paddle.nn.functional as F - - - BATCH_NUM = 10 - BATCH_SIZE = 16 - EPOCH_NUM = 4 - - CLASS_NUM = 10 - - ITERABLE = True # whether the created DataLoader object is iterable - USE_GPU = False # whether to use GPU - - DATA_FORMAT = 'batch_generator' # data format of data source user provides - - paddle.enable_static() - - def simple_net(image, label): - fc_tmp = static.nn.fc(image, size=CLASS_NUM) - cross_entropy = F.softmax_with_cross_entropy(image, label) - loss = paddle.mean(cross_entropy) - sgd = paddle.optimizer.SGD(learning_rate=1e-3) - sgd.minimize(loss) - return loss - - def get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label - - # If the data generator yields one sample each time, - # use DataLoader.set_sample_generator to set the data source. - def sample_generator_creator(): - def __reader__(): - for _ in range(BATCH_NUM * BATCH_SIZE): - image, label = get_random_images_and_labels([784], [1]) - yield image, label - - return __reader__ - - # If the data generator yield list of samples each time, - # use DataLoader.set_sample_list_generator to set the data source. - def sample_list_generator_creator(): - def __reader__(): - for _ in range(BATCH_NUM): - sample_list = [] - for _ in range(BATCH_SIZE): - image, label = get_random_images_and_labels([784], [1]) - sample_list.append([image, label]) - - yield sample_list - - return __reader__ - - # If the data generator yields a batch each time, - # use DataLoader.set_batch_generator to set the data source. - def batch_generator_creator(): - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = get_random_images_and_labels([BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label - - return __reader__ - - # If DataLoader is iterable, use for loop to train the network - def train_iterable(exe, prog, loss, loader): - for _ in range(EPOCH_NUM): - for data in loader(): - exe.run(prog, feed=data, fetch_list=[loss]) - - # If DataLoader is not iterable, use start() and reset() method to control the process - def train_non_iterable(exe, prog, loss, loader): - for _ in range(EPOCH_NUM): - loader.start() # call DataLoader.start() before each epoch starts - try: - while True: - exe.run(prog, fetch_list=[loss]) - except paddle.core.EOFException: - loader.reset() # call DataLoader.reset() after catching EOFException - - def set_data_source(loader, places): - if DATA_FORMAT == 'sample_generator': - loader.set_sample_generator(sample_generator_creator(), batch_size=BATCH_SIZE, drop_last=True, places=places) - elif DATA_FORMAT == 'sample_list_generator': - loader.set_sample_list_generator(sample_list_generator_creator(), places=places) - elif DATA_FORMAT == 'batch_generator': - loader.set_batch_generator(batch_generator_creator(), places=places) - else: - raise ValueError('Unsupported data format') - - image = static.data(name='image', shape=[None, 784], dtype='float32') - label = static.data(name='label', shape=[None, 1], dtype='int64') - - # Define DataLoader - loader = paddle.base.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE) - - # Define network - loss = simple_net(image, label) - - places = static.cuda_places() if USE_GPU else static.cpu_places() - set_data_source(loader, places) - - exe = static.Executor(places[0]) - exe.run(static.default_startup_program()) - - prog = static.CompiledProgram(static.default_main_program()) - if loader.iterable: - train_iterable(exe, prog, loss, loader) - else: - train_non_iterable(exe, prog, loss, loader) + >>> # Example in static graph mode + >>> import numpy as np - Examples 2: + >>> import paddle + >>> import paddle.static as static + >>> import paddle.nn.functional as F - .. code-block:: python - ''' - Example in dynamic graph mode. - ''' - import numpy as np + >>> BATCH_NUM = 10 + >>> BATCH_SIZE = 16 + >>> EPOCH_NUM = 4 + + >>> CLASS_NUM = 10 + + >>> ITERABLE = True # whether the created DataLoader object is iterable + >>> USE_GPU = False # whether to use GPU - import paddle - import paddle.nn as nn - import paddle.optimizer as opt - import paddle.distributed as dist + >>> DATA_FORMAT = 'batch_generator' # data format of data source user provides - BATCH_SIZE = 16 - BATCH_NUM = 4 - EPOCH_NUM = 4 + >>> paddle.enable_static() - IMAGE_SIZE = 784 - CLASS_NUM = 10 + >>> def simple_net(image, label): + ... fc_tmp = static.nn.fc(image, size=CLASS_NUM) + ... cross_entropy = F.softmax_with_cross_entropy(image, label) + ... loss = paddle.mean(cross_entropy) + ... sgd = paddle.optimizer.SGD(learning_rate=1e-3) + ... sgd.minimize(loss) + ... return loss + ... + >>> def get_random_images_and_labels(image_shape, label_shape): + ... image = np.random.random(size=image_shape).astype('float32') + ... label = np.random.random(size=label_shape).astype('int64') + ... return image, label + ... + >>> # If the data generator yields one sample each time, + >>> # use DataLoader.set_sample_generator to set the data source. + >>> def sample_generator_creator(): + ... def __reader__(): + ... for _ in range(BATCH_NUM * BATCH_SIZE): + ... image, label = get_random_images_and_labels([784], [1]) + ... yield image, label + ... + ... return __reader__ + ... + >>> # If the data generator yield list of samples each time, + >>> # use DataLoader.set_sample_list_generator to set the data source. + >>> def sample_list_generator_creator(): + ... def __reader__(): + ... for _ in range(BATCH_NUM): + ... sample_list = [] + ... for _ in range(BATCH_SIZE): + ... image, label = get_random_images_and_labels([784], [1]) + ... sample_list.append([image, label]) + ... + ... yield sample_list + ... + ... return __reader__ + ... + >>> # If the data generator yields a batch each time, + >>> # use DataLoader.set_batch_generator to set the data source. + >>> def batch_generator_creator(): + ... def __reader__(): + ... for _ in range(BATCH_NUM): + ... batch_image, batch_label = get_random_images_and_labels([BATCH_SIZE, 784], [BATCH_SIZE, 1]) + ... yield batch_image, batch_label + ... + ... return __reader__ + ... + >>> # If DataLoader is iterable, use for loop to train the network + >>> def train_iterable(exe, prog, loss, loader): + ... for _ in range(EPOCH_NUM): + ... for data in loader(): + ... exe.run(prog, feed=data, fetch_list=[loss]) + ... + >>> # If DataLoader is not iterable, use start() and reset() method to control the process + >>> def train_non_iterable(exe, prog, loss, loader): + ... for _ in range(EPOCH_NUM): + ... loader.start() # call DataLoader.start() before each epoch starts + ... try: + ... while True: + ... exe.run(prog, fetch_list=[loss]) + ... except paddle.core.EOFException: + ... loader.reset() # call DataLoader.reset() after catching EOFException + ... + >>> def set_data_source(loader, places): + ... if DATA_FORMAT == 'sample_generator': + ... loader.set_sample_generator(sample_generator_creator(), batch_size=BATCH_SIZE, drop_last=True, places=places) + ... elif DATA_FORMAT == 'sample_list_generator': + ... loader.set_sample_list_generator(sample_list_generator_creator(), places=places) + ... elif DATA_FORMAT == 'batch_generator': + ... loader.set_batch_generator(batch_generator_creator(), places=places) + ... else: + ... raise ValueError('Unsupported data format') + ... + >>> image = static.data(name='image', shape=[None, 784], dtype='float32') + >>> label = static.data(name='label', shape=[None, 1], dtype='int64') - USE_GPU = False # whether to use GPU + >>> # Define DataLoader + >>> loader = paddle.base.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE) - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + >>> # Define network + >>> loss = simple_net(image, label) - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, IMAGE_SIZE], [BATCH_SIZE, CLASS_NUM]) - yield batch_image, batch_label + >>> places = static.cuda_places() if USE_GPU else static.cpu_places() + >>> set_data_source(loader, places) - def random_batch_reader(): - return __reader__ + >>> exe = static.Executor(places[0]) + >>> exe.run(static.default_startup_program()) - class LinearNet(nn.Layer): - def __init__(self): - super().__init__() - self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) + >>> prog = static.CompiledProgram(static.default_main_program()) + >>> if loader.iterable: + ... train_iterable(exe, prog, loss, loader) + >>> else: + ... train_non_iterable(exe, prog, loss, loader) - @paddle.jit.to_static - def forward(self, x): - return self._linear(x) + .. code-block:: python + :name: example_2 - # set device - paddle.set_device('gpu' if USE_GPU else 'cpu') + >>> # Example in dynamic graph mode. - # create network - layer = LinearNet() - dp_layer = paddle.DataParallel(layer) - loss_fn = nn.CrossEntropyLoss() - adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) + >>> import numpy as np - # create data loader - loader = paddle.base.io.DataLoader.from_generator(capacity=5) - loader.set_batch_generator(random_batch_reader()) + >>> import paddle + >>> import paddle.nn as nn + >>> import paddle.optimizer as opt + >>> import paddle.distributed as dist - for epoch_id in range(EPOCH_NUM): - for batch_id, (image, label) in enumerate(loader()): - out = layer(image) - loss = loss_fn(out, label) + >>> BATCH_SIZE = 16 + >>> BATCH_NUM = 4 + >>> EPOCH_NUM = 4 - loss.backward() + >>> IMAGE_SIZE = 784 + >>> CLASS_NUM = 10 - adam.step() - adam.clear_grad() - print("Epoch {} batch {}: loss = {}".format( - epoch_id, batch_id, np.mean(loss.numpy()))) + >>> USE_GPU = False # whether to use GPU + >>> def _get_random_images_and_labels(image_shape): + ... image = np.random.random(size=image_shape).astype('float32') + ... label = np.random.randint(0, CLASS_NUM, size=BATCH_SIZE).astype('int64') + ... return image, label + ... + >>> def __reader__(): + ... for _ in range(BATCH_NUM): + ... batch_image, batch_label = _get_random_images_and_labels( + ... [BATCH_SIZE, IMAGE_SIZE]) + ... yield batch_image, batch_label + ... + >>> def random_batch_reader(): + ... return __reader__ + ... + >>> class LinearNet(nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) + ... + ... @paddle.jit.to_static + ... def forward(self, x): + ... return self._linear(x) + ... + >>> # set device + >>> paddle.set_device('gpu' if USE_GPU else 'cpu') + + >>> # doctest: +SKIP('`paddle.jit.to_static` can not run in xdoctest') + >>> # create network + >>> layer = LinearNet() + >>> dp_layer = paddle.DataParallel(layer) + >>> loss_fn = nn.CrossEntropyLoss() + >>> adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) + + >>> # create data loader + >>> loader = paddle.base.io.DataLoader.from_generator(capacity=5) + >>> loader.set_batch_generator(random_batch_reader()) + + >>> for epoch_id in range(EPOCH_NUM): + ... for batch_id, (image, label) in enumerate(loader()): + ... out = layer(image) + ... loss = loss_fn(out, label) + ... + ... loss.backward() + ... + ... adam.step() + ... adam.clear_grad() + ... print("Epoch {} batch {}: loss = {}".format( + ... epoch_id, batch_id, np.mean(loss.numpy()))) + ... + >>> # doctest: -SKIP """ if in_dygraph_mode(): return DygraphGeneratorLoader( @@ -1154,6 +1152,7 @@ class PyReader(DataLoaderBase): the reader manually. .. code-block:: python + :name: example_1 >>> import paddle >>> import paddle.base as base @@ -1172,7 +1171,7 @@ class PyReader(DataLoaderBase): ... input=predict, label=label, ... reduction='none', use_softmax=False ... ) - ... + >>> def reader_creator_random_image_and_label(height, width): ... def reader(): ... for i in range(ITER_NUM): @@ -1182,14 +1181,14 @@ class PyReader(DataLoaderBase): ... fake_label = np.ones([1]) ... yield fake_image, fake_label ... return reader - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') >>> reader = base.io.PyReader(feed_list=[image, label], ... capacity=4, ... iterable=False) - ... + >>> user_defined_reader = reader_creator_random_image_and_label(784, 784) >>> reader.decorate_sample_list_generator( ... paddle.batch(user_defined_reader, batch_size=BATCH_SIZE)) @@ -1204,7 +1203,6 @@ class PyReader(DataLoaderBase): ... except base.core.EOFException: ... reader.reset() ... break - ... 2. If iterable=True, the created PyReader object is decoupled with the program. No operator would be inserted into the program. @@ -1213,6 +1211,7 @@ class PyReader(DataLoaderBase): object into :code:`Executor.run(feed=...)`. .. code-block:: python + :name: example_2 >>> import paddle >>> import paddle.base as base @@ -1231,7 +1230,7 @@ class PyReader(DataLoaderBase): ... input=predict, label=label, ... reduction='none', use_softmax=False ... ) - ... + >>> def reader_creator_random_image(height, width): ... def reader(): ... for i in range(ITER_NUM): @@ -1239,7 +1238,7 @@ class PyReader(DataLoaderBase): ... fake_label = np.ones([1]) ... yield fake_image, fake_label ... return reader - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') >>> reader = base.io.PyReader(feed_list=[image, label], capacity=4, iterable=True, return_list=False) @@ -1248,7 +1247,7 @@ class PyReader(DataLoaderBase): >>> reader.decorate_sample_list_generator( ... paddle.batch(user_defined_reader, batch_size=BATCH_SIZE), ... base.core.CPUPlace()) - ... + >>> loss = network(image, label) >>> executor = base.Executor(base.CPUPlace()) >>> executor.run(base.default_startup_program()) @@ -1256,12 +1255,12 @@ class PyReader(DataLoaderBase): >>> for _ in range(EPOCH_NUM): ... for data in reader(): ... executor.run(feed=data, fetch_list=[loss]) - ... 3. If return_list=True, the return values would be presented as list instead of dict. This is usually used in dygraph mode. .. code-block:: python + :name: example_3 >>> import paddle >>> import paddle.base as base @@ -1276,7 +1275,7 @@ class PyReader(DataLoaderBase): ... yield np.random.uniform(low=0, high=255, size=[height, width]), \ ... np.random.random_integers(low=0, high=9, size=[1]) ... return reader - ... + >>> place = base.CPUPlace() >>> with base.dygraph.guard(place): ... py_reader = base.io.PyReader(capacity=2, return_list=True) @@ -1333,12 +1332,12 @@ def start(self): >>> def generator(): ... for i in range(5): ... yield np.random.uniform(low=0, high=255, size=[784, 784]), - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> reader = base.io.PyReader(feed_list=[image], capacity=4, iterable=False) >>> reader.decorate_sample_list_generator( ... paddle.batch(generator, batch_size=BATCH_SIZE)) - ... + >>> executor = base.Executor(base.CPUPlace()) >>> executor.run(base.default_startup_program()) >>> for i in range(3): @@ -1349,7 +1348,6 @@ def start(self): ... except base.core.EOFException: ... reader.reset() ... break - ... ''' self._loader.start() @@ -1372,12 +1370,12 @@ def reset(self): >>> def generator(): ... for i in range(5): ... yield np.random.uniform(low=0, high=255, size=[784, 784]), - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> reader = base.io.PyReader(feed_list=[image], capacity=4, iterable=False) >>> reader.decorate_sample_list_generator( ... paddle.batch(generator, batch_size=BATCH_SIZE)) - ... + >>> executor = base.Executor(base.CPUPlace()) >>> executor.run(base.default_startup_program()) >>> for i in range(3): @@ -1388,7 +1386,6 @@ def reset(self): ... except base.core.EOFException: ... reader.reset() ... break - ... ''' self._loader.reset() @@ -1435,7 +1432,7 @@ def decorate_sample_generator( ... input=predict, label=label, ... reduction='none', use_softmax=False ... ) - ... + >>> def random_image_and_label_generator(height, width): ... def generator(): ... for i in range(ITER_NUM): @@ -1445,7 +1442,7 @@ def decorate_sample_generator( ... fake_label = np.array([1]) ... yield fake_image, fake_label ... return generator - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') >>> reader = base.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) @@ -1461,7 +1458,6 @@ def decorate_sample_generator( >>> for _ in range(EPOCH_NUM): ... for data in reader(): ... executor.run(feed=data, fetch_list=[loss]) - ... ''' self._loader.set_sample_generator( sample_generator, batch_size, drop_last, places @@ -1502,7 +1498,7 @@ def decorate_sample_list_generator(self, reader, places=None): ... input=predict, label=label, ... reduction='none', use_softmax=False ... ) - ... + >>> def random_image_and_label_generator(height, width): ... def generator(): ... for i in range(ITER_NUM): @@ -1512,7 +1508,7 @@ def decorate_sample_list_generator(self, reader, places=None): ... fake_label = np.ones([1]) ... yield fake_image, fake_label ... return generator - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') >>> reader = base.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) @@ -1521,7 +1517,7 @@ def decorate_sample_list_generator(self, reader, places=None): >>> reader.decorate_sample_list_generator( ... paddle.batch(user_defined_generator, batch_size=BATCH_SIZE), ... base.core.CPUPlace()) - ... + >>> loss = network(image, label) >>> executor = base.Executor(base.core.CPUPlace()) >>> executor.run(base.default_startup_program()) @@ -1529,7 +1525,6 @@ def decorate_sample_list_generator(self, reader, places=None): >>> for _ in range(EPOCH_NUM): ... for data in reader(): ... executor.run(feed=data, fetch_list=[loss]) - ... ''' self._loader.set_sample_list_generator(reader, places) @@ -1568,7 +1563,7 @@ def decorate_batch_generator(self, reader, places=None): ... input=predict, label=label, ... reduction='none', use_softmax=False ... ) - ... + >>> def random_image_and_label_generator(height, width): ... def generator(): ... for i in range(ITER_NUM): @@ -1580,7 +1575,7 @@ def decorate_batch_generator(self, reader, places=None): ... batch_label = batch_label.astype('int64') ... yield batch_image, batch_label ... return generator - ... + >>> image = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') >>> label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') >>> reader = base.io.PyReader(feed_list=[image, label], capacity=4, iterable=True) @@ -1595,7 +1590,6 @@ def decorate_batch_generator(self, reader, places=None): >>> for _ in range(EPOCH_NUM): ... for data in reader(): ... executor.run(feed=data, fetch_list=[loss]) - ... ''' self._loader.set_batch_generator(reader, places) From 91ecc380c3a262d356cf626af46a97b06ab15e37 Mon Sep 17 00:00:00 2001 From: megemini Date: Sun, 24 Sep 2023 15:10:23 +0800 Subject: [PATCH 2/4] [Change] fix code-block --- python/paddle/decomposition/register.py | 13 +- .../paddle/distributed/auto_parallel/api.py | 70 +- .../fleet/base/distributed_strategy.py | 638 +++++++++--------- .../distributed/fleet/base/role_maker.py | 34 +- 4 files changed, 384 insertions(+), 371 deletions(-) diff --git a/python/paddle/decomposition/register.py b/python/paddle/decomposition/register.py index ba8adc54f6562..5d976f2d8e0b3 100644 --- a/python/paddle/decomposition/register.py +++ b/python/paddle/decomposition/register.py @@ -50,13 +50,14 @@ def register_decomp(op_type): Examples: .. code-block:: python - @register_decomp('softmax') - def softmax(x, axis): - molecular = exp(x) - denominator = broadcast_to(sum(molecular, axis=axis, keepdim=True), x.shape) - res = divide(molecular, denominator) - return res + >>> from paddle.decomposition import register + >>> @register.register_decomp('softmax') + >>> def softmax(x, axis): + ... molecular = exp(x) + ... denominator = broadcast_to(sum(molecular, axis=axis, keepdim=True), x.shape) + ... res = divide(molecular, denominator) + ... return res """ if not isinstance(op_type, str): raise TypeError(f'op_type must be str, but got {type(op_type)}.') diff --git a/python/paddle/distributed/auto_parallel/api.py b/python/paddle/distributed/auto_parallel/api.py index c62e1ebf0b66b..0865e52cad7a0 100644 --- a/python/paddle/distributed/auto_parallel/api.py +++ b/python/paddle/distributed/auto_parallel/api.py @@ -34,16 +34,16 @@ class DistAttr(core.TensorDistAttr): sharding_specs(list[str|None]): The specification describing how to shard the Tensor. Examples: + .. code-block:: python - .. code-block:: python + >>> import paddle + >>> import paddle.distributed as dist - import paddle - import paddle.distributed as dist + >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=['x', 'y']) + >>> dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) - mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) - dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) + >>> print(dist_attr) - print(dist_attr) """ def __init__(self, mesh, sharding_specs): @@ -109,22 +109,24 @@ def shard_tensor( Tensor: A Tensor constructed from ``data`` with distributed attributes. Examples: + .. code-block:: python - .. code-block:: python + >>> import paddle + >>> import paddle.distributed as dist - import paddle - import paddle.distributed as dist + >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=['x', 'y']) + >>> dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) - mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) - dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) + >>> # dense tensor + >>> a = paddle.to_tensor([[1,2,3], + ... [5,6,7]]) - # dense tensor - a = paddle.to_tensor([[1,2,3], - [5,6,7]]) - # distributed tensor - d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) + >>> # doctest: +REQUIRES(env:DISTRIBUTED) + >>> # distributed tensor + >>> d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) + + >>> print(d_tensor) - print(d_tensor) """ # 1. create dense tensor # `paddle.to_tensor` supports both dynamic and static mode @@ -166,7 +168,6 @@ def dtensor_from_fn(fn, dist_attr, *args, **kwargs): Tensor: A Tensor constructed from ``fn`` with distributed attributes. Examples: - .. code-block:: python >>> import paddle @@ -177,6 +178,7 @@ def dtensor_from_fn(fn, dist_attr, *args, **kwargs): >>> # Call the function dtensor_from_fn with dist_attr parameter >>> d_tensor = dist.dtensor_from_fn(paddle.ones, dist_attr=dist_attr, shape=[1]) >>> print(d_tensor) + """ tensor = fn(*args, **kwargs) return shard_tensor(tensor, dist_attr=dist_attr) @@ -194,28 +196,30 @@ def reshard(dist_tensor, dist_attr): Tensor: A Distributed Tensor reshared with distributed attributes. Examples: + .. code-block:: python - .. code-block:: python + >>> import paddle + >>> import paddle.distributed as dist + + >>> mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=['x', 'y']) + >>> dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) - import paddle - import paddle.distributed as dist + >>> out_mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=['x', 'y']) + >>> out_dist_attr = dist.DistAttr(mesh=out_mesh, sharding_specs=[None, None]) - mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) - dist_attr = dist.DistAttr(mesh=mesh, sharding_specs=['x', 'y']) + >>> # dense tensor + >>> a = paddle.to_tensor([[1,2,3], + ... [5,6,7]]) - out_mesh = dist.ProcessMesh([[2, 4, 5], [0, 1, 3]], dim_names=["x", "y"]) - out_dist_attr = dist.DistAttr(mesh=out_mesh, sharding_specs=[None, None]) + >>> # doctest: +REQUIRES(env:DISTRIBUTED) + >>> # distributed tensor + >>> d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) - # dense tensor - a = paddle.to_tensor([[1,2,3], - [5,6,7]]) - # distributed tensor - d_tensor = dist.shard_tensor(a, dist_attr=dist_attr) + >>> out_d_tensor = dist.reshard(d_tensor, out_dist_attr) - out_d_tensor = dist.reshard(d_tensor, out_dist_attr) + >>> print(d_tensor) + >>> print(out_d_tensor) - print(d_tensor) - print(out_d_tensor) """ if paddle.framework.in_dynamic_mode(): diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 09e2ef82162bc..d659c3ebf24f9 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -174,12 +174,12 @@ def save_to_prototxt(self, output): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.dgc = True - strategy.recompute = True - strategy.recompute_configs = {"checkpoints": ["x"]} - strategy.save_to_prototxt("dist_strategy.prototxt") + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.dgc = True + >>> strategy.recompute = True + >>> strategy.recompute_configs = {"checkpoints": ["x"]} + >>> strategy.save_to_prototxt("dist_strategy.prototxt") """ with open(output, "w") as fout: @@ -193,9 +193,9 @@ def load_from_prototxt(self, pb_file): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.load_from_prototxt("dist_strategy.prototxt") + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.load_from_prototxt("dist_strategy.prototxt") """ with open(pb_file, 'r') as f: @@ -211,14 +211,14 @@ def execution_strategy(self): Examples: .. code-block:: python - import paddle - exe_strategy = paddle.static.ExecutionStrategy() - exe_strategy.num_threads = 10 - exe_strategy.num_iteration_per_drop_scope = 10 - exe_strategy.num_iteration_per_run = 10 + >>> import paddle + >>> exe_strategy = paddle.static.ExecutionStrategy() + >>> exe_strategy.num_threads = 10 + >>> exe_strategy.num_iteration_per_drop_scope = 10 + >>> exe_strategy.num_iteration_per_run = 10 - strategy = paddle.distributed.fleet.DistributedStrategy() - strategy.execution_strategy = exe_strategy + >>> strategy = paddle.distributed.fleet.DistributedStrategy() + >>> strategy.execution_strategy = exe_strategy """ execution_strategy = paddle.static.ExecutionStrategy() @@ -253,19 +253,19 @@ def build_strategy(self): Examples: .. code-block:: python - import paddle - build_strategy = paddle.static.BuildStrategy() - build_strategy.enable_sequential_execution = True - build_strategy.fuse_elewise_add_act_ops = True - build_strategy.fuse_bn_act_ops = True - build_strategy.enable_auto_fusion = True - build_strategy.fuse_relu_depthwise_conv = True - build_strategy.fuse_broadcast_ops = True - build_strategy.fuse_all_optimizer_ops = True - build_strategy.enable_inplace = True + >>> import paddle + >>> build_strategy = paddle.static.BuildStrategy() + >>> build_strategy.enable_sequential_execution = True + >>> build_strategy.fuse_elewise_add_act_ops = True + >>> build_strategy.fuse_bn_act_ops = True + >>> build_strategy.enable_auto_fusion = True + >>> build_strategy.fuse_relu_depthwise_conv = True + >>> build_strategy.fuse_broadcast_ops = True + >>> build_strategy.fuse_all_optimizer_ops = True + >>> build_strategy.enable_inplace = True - strategy = paddle.distributed.fleet.DistributedStrategy() - strategy.build_strategy = build_strategy + >>> strategy = paddle.distributed.fleet.DistributedStrategy() + >>> strategy.build_strategy = build_strategy """ @@ -302,9 +302,9 @@ def gradient_scale_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.gradient_scale_configs = {'scale_strategy': 'avg'} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.gradient_scale_configs = {'scale_strategy': 'avg'} Note that, strategy must be in 'avg', 'sum' or 'customized' @@ -333,15 +333,15 @@ def a_sync(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - role_maker = fleet.PaddleCloudRoleMaker() - fleet.init(role_maker) + >>> import paddle.distributed.fleet as fleet + >>> role_maker = fleet.PaddleCloudRoleMaker() + >>> fleet.init(role_maker) - strategy = fleet.DistributedStrategy() - strategy.a_sync = True # by default this is True + >>> strategy = fleet.DistributedStrategy() + >>> strategy.a_sync = True # by default this is True - # code block for defining loss and local optimizer - # sgd = fleet.distributed_optimizer(optimizer, strategy) + >>> # code block for defining loss and local optimizer + >>> # sgd = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.a_sync @@ -385,17 +385,17 @@ def a_sync_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - role_maker = fleet.PaddleCloudRoleMaker() - fleet.init(role_maker) + >>> import paddle.distributed.fleet as fleet + >>> role_maker = fleet.PaddleCloudRoleMaker() + >>> fleet.init(role_maker) - strategy = fleet.DistributedStrategy() - strategy.a_sync = True # by default this is True - configs = {"k_steps": 1024, "send_queue_size": 32} - strategy.a_sync_configs = configs + >>> strategy = fleet.DistributedStrategy() + >>> strategy.a_sync = True # by default this is True + >>> configs = {"k_steps": 1024, "send_queue_size": 32} + >>> strategy.a_sync_configs = configs - # code block for defining loss and local optimizer - # sgd = fleet.distributed_optimizer(optimizer, strategy) + >>> # code block for defining loss and local optimizer + >>> # sgd = fleet.distributed_optimizer(optimizer, strategy) """ return get_msg_dict(self.strategy.a_sync_configs) @@ -426,16 +426,16 @@ def trainer_desc_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - role_maker = fleet.PaddleCloudRoleMaker() - fleet.init(role_maker) + >>> import paddle.distributed.fleet as fleet + >>> role_maker = fleet.PaddleCloudRoleMaker() + >>> fleet.init(role_maker) - strategy = fleet.DistributedStrategy() - configs = {"dump_fields_path": "./dump_data", "dump_fields": ["xxx", "yyy"]} - strategy.trainer_desc_configs = configs + >>> strategy = fleet.DistributedStrategy() + >>> configs = {"dump_fields_path": "./dump_data", "dump_fields": ["xxx", "yyy"]} + >>> strategy.trainer_desc_configs = configs - # code block for defining loss and local optimizer - # sgd = fleet.distributed_optimizer(optimizer, strategy) + >>> # code block for defining loss and local optimizer + >>> # sgd = fleet.distributed_optimizer(optimizer, strategy) """ return get_msg_dict(self.strategy.trainer_desc_configs) @@ -450,15 +450,15 @@ def adam_d2sum(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - role_maker = fleet.PaddleCloudRoleMaker() - fleet.init(role_maker) + >>> import paddle.distributed.fleet as fleet + >>> role_maker = fleet.PaddleCloudRoleMaker() + >>> fleet.init(role_maker) - strategy = fleet.DistributedStrategy() - strategy.adam_d2sum = True # by default this is False + >>> strategy = fleet.DistributedStrategy() + >>> strategy.adam_d2sum = True # by default this is False - # code block for defining loss and local optimizer - # sgd = fleet.distributed_optimizer(optimizer, strategy) + >>> # code block for defining loss and local optimizer + >>> # sgd = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.adam_d2sum @@ -501,14 +501,14 @@ def fs_client_param(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - role_maker = fleet.PaddleCloudRoleMaker() - fleet.init(role_maker) - strategy = fleet.DistributedStrategy() - configs = {"uri": "xxx", "user": "xxx", passwd: "xxx"} - strategy.fs_client_param = configs - # code block for defining loss and local optimizer - # sgd = fleet.distributed_optimizer(optimizer, strategy) + >>> import paddle.distributed.fleet as fleet + >>> role_maker = fleet.PaddleCloudRoleMaker() + >>> fleet.init(role_maker) + >>> strategy = fleet.DistributedStrategy() + >>> configs = {"uri": "xxx", "user": "xxx", "passwd": "xxx"} + >>> strategy.fs_client_param = configs + >>> # code block for defining loss and local optimizer + >>> # sgd = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.fs_client_param @@ -880,11 +880,11 @@ def amp(self): Examples: - .. code-block:: python + .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.amp = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.amp = True # by default this is false """ return self.strategy.amp @@ -928,27 +928,29 @@ def amp_configs(self): use_fp16_guard(bool): Whether to use `fp16_guard` when constructing the program. Default True. Only takes effect when `use_pure_fp16` is turned on. - Examples 1: + Examples: .. code-block:: python + :name:example_1 - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.amp = True - strategy.amp_configs = { - "init_loss_scaling": 32768, - "custom_white_list": ['conv2d']} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.amp = True + >>> strategy.amp_configs = { + ... "init_loss_scaling": 32768, + ... "custom_white_list": ['conv2d'] + ... } - Examples 2: .. code-block:: python + :name:example_2 - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.amp = True - # pure fp16 - strategy.amp_configs = { - "init_loss_scaling": 32768, - "use_pure_fp16": True - } + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.amp = True + >>> # pure fp16 + >>> strategy.amp_configs = { + ... "init_loss_scaling": 32768, + ... "use_pure_fp16": True + ... } """ return get_msg_dict(self.strategy.amp_configs) @@ -969,9 +971,9 @@ def asp(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.asp = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.asp = True # by default this is false """ return self.strategy.asp @@ -992,11 +994,11 @@ def qat(self): Examples: - .. code-block:: python + .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.qat = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.qat = True # by default this is false """ return self.strategy.qat @@ -1019,16 +1021,20 @@ def qat_configs(self): not_quant_pattern(list[str]): When the skip pattern is detected in an op's name scope, the corresponding op will not be quantized. algo(str): Other quantization training algorithm. - Exampless: - .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.qat = True - strategy.qat_configs = { - "channel_wise_abs_max": True, - "weight_bits": 8, - "activation_bits: 8, - "not_quant_pattern": ['skip_quant']} + + Examples: + .. code-block:: python + + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.qat = True + >>> strategy.qat_configs = { + ... "channel_wise_abs_max": True, + ... "weight_bits": 8, + ... "activation_bits": 8, + ... "not_quant_pattern": ['skip_quant'] + ... } + """ return get_msg_dict(self.strategy.qat_configs) @@ -1046,11 +1052,11 @@ def recompute(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.recompute = True - # suppose x and y are names of checkpoint tensors for recomputation - strategy.recompute_configs = {"checkpoints": ["x", "y"]} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.recompute = True + >>> # suppose x and y are names of checkpoint tensors for recomputation + >>> strategy.recompute_configs = {"checkpoints": ["x", "y"]} """ return self.strategy.recompute @@ -1065,9 +1071,9 @@ def sync_nccl_allreduce(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.sync_nccl_allreduce = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.sync_nccl_allreduce = True """ return self.strategy.sync_nccl_allreduce @@ -1091,9 +1097,9 @@ def use_hierarchical_allreduce(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.use_hierarchical_allreduce = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.use_hierarchical_allreduce = True """ return self.strategy.use_hierarchical_allreduce @@ -1118,9 +1124,9 @@ def hierarchical_allreduce_inter_nranks(self): Example: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.hierarchical_allreduce_inter_nranks = 8 + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.hierarchical_allreduce_inter_nranks = 8 """ return self.strategy.hierarchical_allreduce_inter_nranks @@ -1146,9 +1152,9 @@ def sync_batch_norm(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.sync_batch_norm = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.sync_batch_norm = True """ @@ -1172,9 +1178,9 @@ def fuse_all_reduce_ops(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.fuse_all_reduce_ops = False + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.fuse_all_reduce_ops = False """ return self.strategy.fuse_all_reduce_ops @@ -1198,9 +1204,9 @@ def fuse_grad_size_in_MB(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.fuse_grad_size_in_MB = 50 + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.fuse_grad_size_in_MB = 50 """ return self.strategy.fuse_grad_size_in_MB @@ -1226,9 +1232,9 @@ def last_comm_group_size_MB(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.last_comm_group_size_MB = 2 + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.last_comm_group_size_MB = 2 """ return self.strategy.last_comm_group_size_MB @@ -1253,9 +1259,9 @@ def find_unused_parameters(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.find_unused_parameters = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.find_unused_parameters = True """ @@ -1296,9 +1302,9 @@ def nccl_comm_num(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.nccl_comm_num = 2 + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.nccl_comm_num = 2 """ @@ -1342,13 +1348,14 @@ def recompute_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.recompute = True - strategy.recompute_configs = { - "checkpoints": ["x", "y"], - "enable_offload": True, - "checkpoint_shape": [100, 512, 1024] } + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.recompute = True + >>> strategy.recompute_configs = { + ... "checkpoints": ["x", "y"], + ... "enable_offload": True, + ... "checkpoint_shape": [100, 512, 1024] + ... } """ return get_msg_dict(self.strategy.recompute_configs) @@ -1377,9 +1384,9 @@ def sharding(self): Examples: .. code-block:: python - import paddle.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.sharding = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.sharding = True """ return self.strategy.sharding @@ -1435,17 +1442,17 @@ def sharding_configs(self): Examples: .. code-block:: python - # sharding-DP, 2 nodes with 8 gpus per node - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.sharding = True - strategy.sharding_configs = { - "sharding_segment_strategy": "segment_broadcast_MB", - "segment_broadcast_MB": 32, - "sharding_degree": 8, - "dp_degree": 2, - "gradient_merge_acc_step": 4, - } + >>> # sharding-DP, 2 nodes with 8 gpus per node + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.sharding = True + >>> strategy.sharding_configs = { + ... "sharding_segment_strategy": "segment_broadcast_MB", + ... "segment_broadcast_MB": 32, + ... "sharding_degree": 8, + ... "dp_degree": 2, + ... "gradient_merge_acc_step": 4, + ... } """ return get_msg_dict(self.strategy.sharding_configs) @@ -1467,9 +1474,9 @@ def without_graph_optimization(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.without_graph_optimization = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.without_graph_optimization = True """ return self.strategy.without_graph_optimization @@ -1495,9 +1502,9 @@ def _calc_comm_same_stream(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.calc_comm_same_stream = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy._calc_comm_same_stream = True """ return self.strategy.calc_comm_same_stream @@ -1523,9 +1530,9 @@ def fuse_grad_merge(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.fuse_param_grad = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.fuse_grad_merge = True """ return self.strategy.fuse_grad_merge @@ -1547,10 +1554,10 @@ def fuse_grad_size_in_num(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.fuse_grad_size_in_num = 2 + >>> strategy = fleet.DistributedStrategy() + >>> strategy.fuse_grad_size_in_num = 2 """ return self.strategy.fuse_grad_size_in_num @@ -1577,9 +1584,9 @@ def pipeline(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.pipeline = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.pipeline = True """ return self.strategy.pipeline @@ -1637,10 +1644,10 @@ def pipeline_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.pipeline = True - strategy.pipeline_configs = {"micro_batch_size": 12} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.pipeline = True + >>> strategy.pipeline_configs = {"micro_batch_size": 12} """ @@ -1663,9 +1670,9 @@ def tensor_parallel(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.tensor_parallel = True + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.tensor_parallel = True """ return self.strategy.tensor_parallel @@ -1695,11 +1702,11 @@ def tensor_parallel_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.tensor_parallel = True - strategy.tensor_parallel_configs = {"tensor_parallel_degree": 4, - "tensor_init_seed": 123} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.tensor_parallel = True + >>> strategy.tensor_parallel_configs = {"tensor_parallel_degree": 4, + ... "tensor_init_seed": 123} """ return get_msg_dict(self.strategy.tensor_parallel_configs) @@ -1739,13 +1746,14 @@ def hybrid_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.hybrid_configs = { - "dp_degree": 1, - "mp_degree": 2, - "pp_degree": 1, - "order":['dp','pp','sharding', 'sep', 'mp']} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.hybrid_configs = { + ... "dp_degree": 1, + ... "mp_degree": 2, + ... "pp_degree": 1, + ... "order":['dp','pp','sharding', 'sep', 'mp'] + ... } """ return get_msg_dict(self.strategy.hybrid_configs) @@ -1786,13 +1794,12 @@ def localsgd(self): For more details, please refer to `Don't Use Large Mini-Batches, Use Local SGD `_. - Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.localsgd = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.localsgd = True # by default this is false """ return self.strategy.localsgd @@ -1819,11 +1826,11 @@ def localsgd_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.localsgd = True - strategy.localsgd_configs = {"k_steps": 4, - "begin_step": 30} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.localsgd = True + >>> strategy.localsgd_configs = {"k_steps": 4, + ... "begin_step": 30} """ @@ -1848,9 +1855,9 @@ def adaptive_localsgd(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.adaptive_localsgd = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.adaptive_localsgd = True # by default this is false """ return self.strategy.adaptive_localsgd @@ -1880,11 +1887,11 @@ def adaptive_localsgd_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.adaptive_localsgd = True - strategy.adaptive_localsgd_configs = {"init_k_steps": 1, - "begin_step": 30} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.adaptive_localsgd = True + >>> strategy.adaptive_localsgd_configs = {"init_k_steps": 1, + ... "begin_step": 30} """ @@ -1912,9 +1919,9 @@ def dgc(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.dgc = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.dgc = True # by default this is false """ return self.strategy.dgc @@ -1949,10 +1956,10 @@ def dgc_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.dgc = True - strategy.dgc_configs = {"rampup_begin_step": 1252} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.dgc = True + >>> strategy.dgc_configs = {"rampup_begin_step": 1252} """ return get_msg_dict(self.strategy.dgc_configs) @@ -1973,10 +1980,10 @@ def fp16_allreduce(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.fp16_allreduce = True # by default this is false + >>> strategy = fleet.DistributedStrategy() + >>> strategy.fp16_allreduce = True # by default this is false """ return self.strategy.fp16_allreduce @@ -2004,10 +2011,10 @@ def gradient_merge(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.gradient_merge = True - strategy.gradient_merge_configs = {"k_steps": 4, "avg": True} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.gradient_merge = True + >>> strategy.gradient_merge_configs = {"k_steps": 4, "avg": True} """ return self.strategy.gradient_merge @@ -2034,10 +2041,10 @@ def gradient_merge_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.gradient_merge = True - strategy.gradient_merge_configs = {"k_steps": 4, "avg": True} + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.gradient_merge = True + >>> strategy.gradient_merge_configs = {"k_steps": 4, "avg": True} """ return get_msg_dict(self.strategy.gradient_merge_configs) @@ -2063,9 +2070,9 @@ def lars(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.lars = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.lars = True # by default this is false """ return self.strategy.lars @@ -2095,15 +2102,15 @@ def lars_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.lars = True - strategy.lars_configs = { - "lars_coeff": 0.01, - "lars_weight_decay": 0.0005, - "epsilon": 0, - "exclude_from_weight_decay": ['batch_norm', '.b_0'] - } + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.lars = True + >>> strategy.lars_configs = { + ... "lars_coeff": 0.01, + ... "lars_weight_decay": 0.0005, + ... "epsilon": 0, + ... "exclude_from_weight_decay": ['batch_norm', '.b_0'] + ... } """ return get_msg_dict(self.strategy.lars_configs) @@ -2128,9 +2135,9 @@ def lamb(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.lamb = True # by default this is false + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.lamb = True # by default this is false """ @@ -2158,13 +2165,13 @@ def lamb_configs(self): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.lamb = True - strategy.lamb_configs = { - 'lamb_weight_decay': 0.01, - 'exclude_from_weight_decay': [], - } + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.lamb = True + >>> strategy.lamb_configs = { + ... 'lamb_weight_decay': 0.01, + ... 'exclude_from_weight_decay': [], + ... } """ return get_msg_dict(self.strategy.lamb_configs) @@ -2207,17 +2214,17 @@ def auto(self): Examples: .. code-block:: python - import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet + >>> import paddle + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.auto = True - # if set other strategy at the same time, auto will not apply - # strategy.amp = True + >>> strategy = fleet.DistributedStrategy() + >>> strategy.auto = True + >>> # if set other strategy at the same time, auto will not apply + >>> # strategy.amp = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.auto @@ -2243,17 +2250,17 @@ def semi_auto(self): Examples: .. code-block:: python - import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet + >>> import paddle + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.semi_auto = True - # if set other strategy at the same time, auto will not apply - # strategy.amp = True + >>> strategy = fleet.DistributedStrategy() + >>> strategy.semi_auto = True + >>> # if set other strategy at the same time, auto will not apply + >>> # strategy.amp = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.semi_auto @@ -2276,12 +2283,12 @@ def auto_search(self): Examples: .. code-block:: python - import paddle + >>> import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.auto_search = True + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.auto_search = True """ return self.strategy.auto_search @@ -2303,12 +2310,12 @@ def split_data(self): Examples: .. code-block:: python - import paddle + >>> import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.split_data = True + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet + >>> strategy = fleet.DistributedStrategy() + >>> strategy.split_data = True """ return self.strategy.split_data @@ -2359,15 +2366,16 @@ def qat_configs(self): Exampless: .. code-block:: python - import paddle.distributed.fleet as fleet + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.qat = True - strategy.qat_configs = { - "channel_wise_abs_max": True, - "weight_bits": 8, - "activation_bits: 8, - "not_quant_pattern": ['skip_quant']} + >>> strategy = fleet.DistributedStrategy() + >>> strategy.qat = True + >>> strategy.qat_configs = { + ... "channel_wise_abs_max": True, + ... "weight_bits": 8, + ... "activation_bits": 8, + ... "not_quant_pattern": ['skip_quant'] + ... } """ return get_msg_dict(self.strategy.qat_configs) @@ -2389,15 +2397,15 @@ def heter_ccl_mode(self): Examples: .. code-block:: python - import paddle - import paddle.distributed.fleet as fleet + >>> import paddle + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.heter_ccl_mode = True + >>> strategy = fleet.DistributedStrategy() + >>> strategy.heter_ccl_mode = True - # for initialize parallel env, only need to call - paddle.distributed.init_parallel_env() - # then the heterogenous context will be created. + >>> # for initialize parallel env, only need to call + >>> paddle.distributed.init_parallel_env() + >>> # then the heterogenous context will be created. """ return self.strategy.heter_ccl_mode @@ -2422,15 +2430,15 @@ def cudnn_exhaustive_search(self): Examples: .. code-block:: python - import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet + >>> import paddle + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.cudnn_exhaustive_search = False + >>> strategy = fleet.DistributedStrategy() + >>> strategy.cudnn_exhaustive_search = False - optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.cudnn_exhaustive_search @@ -2458,15 +2466,15 @@ def conv_workspace_size_limit(self): Examples: .. code-block:: python - import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet + >>> import paddle + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.conv_workspace_size_limit = 1024 + >>> strategy = fleet.DistributedStrategy() + >>> strategy.conv_workspace_size_limit = 1024 - optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.conv_workspace_size_limit @@ -2492,15 +2500,15 @@ def cudnn_batchnorm_spatial_persistent(self): Examples: .. code-block:: python - import paddle - paddle.enable_static() - import paddle.distributed.fleet as fleet + >>> import paddle + >>> paddle.enable_static() + >>> import paddle.distributed.fleet as fleet - strategy = fleet.DistributedStrategy() - strategy.cudnn_batchnorm_spatial_persistent = True + >>> strategy = fleet.DistributedStrategy() + >>> strategy.cudnn_batchnorm_spatial_persistent = True - optimizer = paddle.optimizer.SGD(learning_rate=0.01) - optimizer = fleet.distributed_optimizer(optimizer, strategy) + >>> optimizer = paddle.optimizer.SGD(learning_rate=0.01) + >>> optimizer = fleet.distributed_optimizer(optimizer, strategy) """ return self.strategy.cudnn_batchnorm_spatial_persistent diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 7b9cf269dcd26..ddc6c411598c3 100755 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -551,20 +551,20 @@ class PaddleCloudRoleMaker(RoleMakerBase): Examples: .. code-block:: python - import os - import paddle.distributed.fleet as fleet + >>> import os + >>> import paddle.distributed.fleet as fleet - os.environ["PADDLE_PSERVER_NUMS"] = "2" - os.environ["PADDLE_TRAINERS_NUM"] = "2" + >>> os.environ["PADDLE_PSERVER_NUMS"] = "2" + >>> os.environ["PADDLE_TRAINERS_NUM"] = "2" - os.environ["POD_IP"] = "127.0.0.1" - os.environ["PADDLE_PORT"] = "36001" - os.environ["TRAINING_ROLE"] = "PSERVER" - os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001,127.0.0.2:36001" + >>> os.environ["POD_IP"] = "127.0.0.1" + >>> os.environ["PADDLE_PORT"] = "36001" + >>> os.environ["TRAINING_ROLE"] = "PSERVER" + >>> os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001,127.0.0.2:36001" - os.environ["PADDLE_TRAINER_ID"] = "0" + >>> os.environ["PADDLE_TRAINER_ID"] = "0" - fleet.PaddleCloudRoleMaker(is_collective=False) + >>> fleet.PaddleCloudRoleMaker(is_collective=False) """ @@ -1211,14 +1211,14 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): Examples: .. code-block:: python - import paddle.distributed.fleet as fleet - from paddle.distributed.fleet.base.role_maker import Role + >>> import paddle.distributed.fleet as fleet + >>> from paddle.distributed.fleet.base.role_maker import Role - fleet.UserDefinedRoleMaker( - current_id=0, - role=Role.SERVER, - worker_num=2, - server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) + >>> fleet.UserDefinedRoleMaker( + ... current_id=0, + ... role=Role.SERVER, + ... worker_num=2, + ... server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) """ def __init__(self, is_collective=False, init_gloo=False, **kwargs): From c0a4296b22f9378a9bf6c56889eb663b2b3b3f5e Mon Sep 17 00:00:00 2001 From: megemini Date: Sun, 24 Sep 2023 18:02:53 +0800 Subject: [PATCH 3/4] [Fix] fix load_from_prototxt --- python/paddle/distributed/fleet/base/distributed_strategy.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index d659c3ebf24f9..4750c6bca66fc 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -195,6 +195,11 @@ def load_from_prototxt(self, pb_file): >>> import paddle.distributed.fleet as fleet >>> strategy = fleet.DistributedStrategy() + >>> strategy.dgc = True + >>> strategy.recompute = True + >>> strategy.recompute_configs = {"checkpoints": ["x"]} + >>> strategy.save_to_prototxt("dist_strategy.prototxt") + >>> strategy.load_from_prototxt("dist_strategy.prototxt") """ From 8d1a61b7aa3377d1ab2bec806d0284c71af551e9 Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Sun, 24 Sep 2023 21:37:17 +0800 Subject: [PATCH 4/4] Apply suggestions from code review --- python/paddle/base/framework.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/python/paddle/base/framework.py b/python/paddle/base/framework.py index dd4eae82a7b60..d6ec848283fc8 100644 --- a/python/paddle/base/framework.py +++ b/python/paddle/base/framework.py @@ -2850,9 +2850,9 @@ class Operator: .. code-block:: python >>> import paddle + >>> paddle.enable_static() - >>> import paddle.base as base - >>> cur_program = base.Program() + >>> cur_program = paddle.static.Program() >>> cur_block = cur_program.current_block() >>> var1 = cur_block.create_var(name="var1", shape=[-1, 23, 48], dtype='float32') >>> var2 = cur_block.create_var(name="var2", shape=[-1, 23, 48], dtype='float32') @@ -3206,10 +3206,9 @@ def _to_readable_code(self, skip_op_callstack=True): .. code-block:: python >>> import paddle - >>> paddle.enable_static() - >>> import paddle.base as base - >>> cur_program = base.Program() + >>> paddle.enable_static() + >>> cur_program = paddle.static.Program() >>> cur_block = cur_program.current_block() >>> var = cur_block.create_var(name="X", ... shape=[-1, 23, 48], @@ -3939,10 +3938,9 @@ class Block: .. code-block:: python >>> import paddle - >>> paddle.enable_static() - >>> import paddle.base as base - >>> cur_program = base.Program() + >>> paddle.enable_static() + >>> cur_program = paddle.static.Program() >>> cur_block = cur_program.current_block() >>> var = cur_block.create_var(name="X", ... shape=[-1, 23, 48], @@ -3980,10 +3978,9 @@ def _to_readable_code(self, skip_op_callstack=True): .. code-block:: python >>> import paddle - >>> paddle.enable_static() - >>> import paddle.base as base - >>> cur_program = base.Program() + >>> paddle.enable_static() + >>> cur_program = paddle.static.Program() >>> cur_block = cur_program.current_block() >>> new_var = cur_block.create_var(name="X", ... shape=[-1, 23, 48], @@ -7293,9 +7290,8 @@ def to_string(self, throw_on_error, with_details=False): .. code-block:: python >>> import paddle - >>> import paddle.base as base >>> paddle.enable_static() - >>> prog = base.default_main_program() + >>> prog = paddle.static.default_main_program() >>> rlt = paddle.static.data("fake_data", shape=[-1,1,1], dtype='float32') >>> debug_str = prog.to_string(throw_on_error=True, with_details=False) >>> print(debug_str)