Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Distributed Automatic SParsity with Fleet #33558

Merged
merged 25 commits into from
Jul 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ea668b2
Automatic SParsity Utilities
mingxu1067 May 19, 2021
3725920
Automatic SParsity Helper
mingxu1067 May 19, 2021
24ea128
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
mingxu1067 May 19, 2021
c0fc73b
Automatic SParsity Helper
mingxu1067 May 21, 2021
5e4ab93
ASP Fleet MetaOptimizer
mingxu1067 May 21, 2021
67e8a52
Merged from PaddlePaddle/Paddle develop
mingxu1067 Jun 14, 2021
fb7c5d2
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
mingxu1067 Jun 15, 2021
30e3718
Removed test_fleet_with_asp from nomal tests
mingxu1067 Jun 16, 2021
acd4ba1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
mingxu1067 Jun 16, 2021
b6f90d4
Split test_fleet_with_asp to two tests for fitting time limition
mingxu1067 Jun 16, 2021
c3693ac
Merged Paddle/develop 58e465aa472799
mingxu1067 Jun 25, 2021
1966f82
Added enable_static() to example code.
mingxu1067 Jun 25, 2021
0ef2f7a
Slightly modified example code.
mingxu1067 Jun 25, 2021
dad3d13
Set longer timeout for fleet_with_asp unit tests
mingxu1067 Jun 25, 2021
7990a46
Removed set timeout of fleet_with_asp tests due to approval required.
mingxu1067 Jun 25, 2021
5fec8a9
Slightly changed example code in ASP.
mingxu1067 Jun 26, 2021
46053d4
Added cuda_compiled checking in asp example code.
mingxu1067 Jun 28, 2021
6bc3628
Fix code example in asp.py and reduce net in asp unittest
mingxu1067 Jun 28, 2021
f097699
Fix code example in asp.py
mingxu1067 Jun 28, 2021
9bb7e38
Fix fleet_with_asp test failed
mingxu1067 Jun 30, 2021
efe8885
Fix fleet_with_asp test failed
mingxu1067 Jun 30, 2021
fffab5f
Set GPU devices for fleet with asp tests
mingxu1067 Jun 30, 2021
ff66b0b
Setup ip and port for fleet with asp tests
mingxu1067 Jun 30, 2021
c642a66
Modified ASP meta_optimizer and related tests
mingxu1067 Jul 6, 2021
0a05dda
Added ENDPOINT in unittests
mingxu1067 Jul 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/fluid/framework/distributed_strategy.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -178,6 +179,7 @@ message DistributedStrategy {
optional bool without_graph_optimization = 30 [ default = false ];
optional int32 fuse_grad_size_in_num = 31 [ default = 1 ];
optional bool calc_comm_same_stream = 32 [ default = false ];
optional bool asp = 33 [ default = false ];

optional RecomputeConfig recompute_configs = 101;
optional AMPConfig amp_configs = 102;
Expand Down
28 changes: 27 additions & 1 deletion python/paddle/distributed/fleet/base/distributed_strategy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -424,6 +425,31 @@ def amp_configs(self, configs):
check_configs_key(self.strategy.amp_configs, configs, "amp_configs")
assign_configs_value(self.strategy.amp_configs, configs)

@property
def asp(self):
"""
Indicating whether we are using automatic sparsity training
Default Value: False

Examples:

.. code-block:: python

import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.asp = True # by default this is false

"""
return self.strategy.asp

@asp.setter
@is_strict_auto
def asp(self, flag):
if isinstance(flag, bool):
self.strategy.asp = flag
else:
print("WARNING: asp should have value of bool type")

@property
def recompute(self):
"""
Expand Down
4 changes: 3 additions & 1 deletion python/paddle/distributed/fleet/meta_optimizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,6 +13,7 @@
# See the License for the specific language governing permissions and

from .amp_optimizer import AMPOptimizer
from .asp_optimizer import ASPOptimizer
from .recompute_optimizer import RecomputeOptimizer
from .gradient_merge_optimizer import GradientMergeOptimizer
from .graph_execution_optimizer import GraphExecutionOptimizer
Expand Down
66 changes: 66 additions & 0 deletions python/paddle/distributed/fleet/meta_optimizers/asp_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from paddle.fluid.contrib.sparsity.asp import ASPHelper
from .meta_optimizer_base import MetaOptimizerBase

__all__ = []


class ASPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(ASPOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = [
"AMPOptimizer", "LarsOptimizer", "LambOptimizer",
"GraphExecutionOptimizer", "RecomputeOptimizer",
"GradientMergeOptimizer"
]
self.meta_optimizers_black_list = []

def _set_basic_info(self, loss, role_maker, user_defined_optimizer,
user_defined_strategy):
super(ASPOptimizer, self)._set_basic_info(
loss, role_maker, user_defined_optimizer, user_defined_strategy)

def _can_apply(self):
if not self.role_maker._is_collective:
return False

if self.user_defined_strategy.asp:
return True

return False

def _disable_strategy(self, dist_strategy):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set dist_strategy.asp = False ?

dist_strategy.asp = False

def _enable_strategy(self, dist_strategy, context):
dist_strategy.asp = True

def minimize_impl(self,
loss,
startup_program=None,
parameter_list=None,
no_grad_set=None):

optimize_ops, params_grads = ASPHelper._minimize(
self.inner_opt,
loss,
startup_program=startup_program,
parameter_list=parameter_list,
no_grad_set=no_grad_set)

return optimize_ops, params_grads
40 changes: 27 additions & 13 deletions python/paddle/fluid/contrib/sparsity/asp.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,15 @@ def decorate(optimizer):
Examples:
.. code-block:: python

import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib import sparsity

main_program = fluid.Program()
startup_program = fluid.Program()

paddle.enable_static()

with fluid.program_guard(main_program, startup_program):
input_data = fluid.layers.data(name='data', shape=[None, 128])
label = fluid.layers.data(name='label', shape=[None, 10])
Expand All @@ -78,17 +81,13 @@ def decorate(optimizer):
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))

optimizer = fluid.optimizer.SGD(learning_rate=0.1)

optimizer = sparsity.decorate(optimizer)
optimizer.minimize(loss, startup_program)
# if do sparse training with Fleet, please replace above decorate with:
# strategy = paddle.distributed.fleet.DistributedStrategy()
# strategy.asp = True
# optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)

# When apply distributed training with Fleet
import paddle.distributed.fleet as fleet

optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimizer = sparsity.decorate(optimizer) # Need to be called before `fleet.distributed_optimizer`
optimizer = fleet.distributed_optimizer(optimizer)
optimizer.minimize(loss, startup_program)
optimizer.minimize(loss, startup_program)
"""
return ASPHelper.decorate(optimizer)

Expand Down Expand Up @@ -126,23 +125,38 @@ def prune_model(place,
Examples:
.. code-block:: python

import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.contrib import sparsity

paddle.enable_static()

main_program = fluid.Program()
startup_program = fluid.Program()

place = fluid.CUDAPlace(0)
place = paddle.CPUPlace()
if core.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里空行加的有点多了。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, 分散式那邊有沒有甚麼comment? 我一起修改後再commit一次

with fluid.program_guard(main_program, startup_program):
input_data = fluid.layers.data(name='data', shape=[None, 128])
label = fluid.layers.data(name='label', shape=[None, 10])
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None)
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None, name="need_sparse")
hidden = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=32, act=None, name="need_dense")
prob = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=10, act=None)
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))

optimizer = decorate(fluid.optimizer.SGD(learning_rate=0.1))
optimizer.minimize(optimizer, loss, main_program, startup_program)
# Setup exluded layers out from ASP workflow.
# Please note, excluded_layers must be set before calling `optimizer.minimize()`.
sparsity.set_excluded_layers(main_program, ["need_dense"])

optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimizer = fluid.contrib.mixed_precision.decorator.decorate(optimizer )
# Calling sparsity.decorate() to wrap minimize() in optimizer, which
# will insert necessary masking operations for ASP workflow.
optimizer = sparsity.decorate(optimizer)
optimizer.minimize(loss, startup_program)

exe = fluid.Executor(place)
exe.run(startup_program)
Expand Down
8 changes: 8 additions & 0 deletions python/paddle/fluid/tests/unittests/asp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")

list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp"})
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_amp"})

foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)

if(WITH_DISTRIBUTE)
py_test_modules(test_fleet_with_asp MODULES test_fleet_with_asp ENVS ${dist_ENVS})
py_test_modules(test_fleet_with_asp_amp MODULES test_fleet_with_asp_amp ENVS ${dist_ENVS})
endif()
89 changes: 89 additions & 0 deletions python/paddle/fluid/tests/unittests/asp/test_fleet_with_asp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import os
from paddle.fluid.contrib import sparsity
from paddle.fluid.contrib.sparsity.asp import ASPHelper
import numpy as np
cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES')
if cuda_visible_devices is None or cuda_visible_devices == "":
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
else:
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices.split(',')[0]

paddle.enable_static()


class TestFleetWithASP(unittest.TestCase):
def setUp(self):
os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213"
os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213"
os.environ["PADDLE_TRAINERS_NUM"] = "1"
os.environ["PADDLE_TRAINER_ID"] = "0"

def net(self, main_prog, startup_prog):
with fluid.program_guard(main_prog, startup_prog):
input_x = paddle.static.data(
name="x", shape=[-1, 32], dtype='float32')
input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')

fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y)
avg_cost = paddle.mean(x=cost)

strategy = paddle.distributed.fleet.DistributedStrategy()
strategy.asp = True
return avg_cost, strategy, input_x, input_y

def test_with_asp(self):
fleet.init(is_collective=True)
train_prog, startup_prog = fluid.Program(), fluid.Program()
avg_cost, strategy, input_x, input_y = self.net(train_prog,
startup_prog)

with fluid.program_guard(train_prog, startup_prog):
optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01)
optimizer = fleet.distributed_optimizer(
optimizer, strategy=strategy)
optimizer.minimize(avg_cost)

place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
) else fluid.CPUPlace()

exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place)
exe.run(startup_prog)

sparsity.prune_model(place, train_prog)

data = (np.random.randn(64, 32), np.random.randint(2, size=(64, 1)))
exe.run(train_prog, feed=feeder.feed([data]))

for param in train_prog.global_block().all_parameters():
if ASPHelper._is_supported_layer(train_prog, param.name):
mat = np.array(fluid.global_scope().find_var(param.name)
.get_tensor())
self.assertTrue(sparsity.check_sparsity(mat.T, n=2, m=4))


if __name__ == "__main__":
unittest.main()
Loading