Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hybrid] seed and dropout op support force-cpu #35820

Merged
merged 11 commits into from
Sep 28, 2021
3 changes: 3 additions & 0 deletions paddle/fluid/operators/dropout_impl.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@ void DropoutFwGPUKernelDriver(const platform::CUDADeviceContext& dev_ctx,
TensorCopySync(*seed, platform::CPUPlace(), &seed_cpu_tensor);
seed_data = static_cast<uint64_t>(seed_cpu_tensor.data<int>()[0]);
increment = offset;
} else if (seed && platform::is_cpu_place(seed->place())) {
seed_data = *(seed->data<int>());
increment = offset;
} else if (gen_cuda->GetIsInitPy() && (!is_fix_seed)) {
auto seed_offset = gen_cuda->IncrementOffset(offset);
seed_data = seed_offset.first;
Expand Down
12 changes: 12 additions & 0 deletions paddle/fluid/operators/dropout_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ class DropoutOp : public framework::OperatorWithKernel {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
}

framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override {
if (var_name == "Seed" && platform::is_cpu_place(tensor.place())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里是不是不需要判断is_cpu_place了

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已经修改

VLOG(10) << "var_name:" << var_name << " need not to transform";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

建议VLOG加上在什么op, does not need to transform in dropout op

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已修改

return expected_kernel_type;
}

return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
};

class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/operators/seed_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ class SeedOpMaker : public framework::OpProtoAndCheckerMaker {
void Make() override {
AddOutput("Out", "The output of seed op.");
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddAttr<bool>("force_cpu",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

看下这个op预测会不会用,可能需要加上 AddCheckpoint 保证预测的兼容性

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已加上AddCheckpoint

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

如果预测不需要是不是还得加AsExtra(),新出的规范

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已经确认,并已加上AsExtra()

"(bool, default false) Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running "
"device")
.SetDefault(false);
AddComment(R"DOC(
Seed Operator.
)DOC");
Expand Down
32 changes: 24 additions & 8 deletions paddle/fluid/operators/seed_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/seed_op.h"

namespace paddle {
Expand All @@ -20,22 +21,37 @@ namespace operators {
template <typename Place, typename T>
class GPUSeedKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Output<Tensor>("Out");
auto* out_data = out->mutable_data<T>(context.GetPlace());
void Compute(const framework::ExecutionContext &context) const override {
auto *out = context.Output<Tensor>("Out");
int user_seed = context.Attr<int>("seed");
auto force_cpu = context.Attr<bool>("force_cpu");
std::random_device rnd;
int seed;
if (user_seed != 0) {
seed = user_seed;
} else {
seed = rnd();
}
auto target_gpu_place =
BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace());
auto stream = context.cuda_device_context().stream();
memory::Copy(target_gpu_place, out_data, platform::CPUPlace(), &seed,
sizeof(int), stream);

bool cpu_place = force_cpu || context.GetPlace() == platform::CPUPlace();
if (cpu_place) {
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(context.GetPlace());
out->mutable_data<T>(platform::CPUPlace(),
framework::proto::VarType::SIZE_T);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个SIZE_T是干啥的

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已删除

// out_data[0] = seed;
math::SetConstant<platform::CPUDeviceContext, T> functor;
functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
out, static_cast<T>(seed));
} else {
out->mutable_data<T>(context.GetPlace());
auto target_gpu_place =
BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace());
auto stream = context.cuda_device_context().stream();
memory::Copy(target_gpu_place, out, platform::CPUPlace(), &seed,
sizeof(int), stream);
}
}
};

Expand Down
7 changes: 5 additions & 2 deletions python/paddle/fluid/backward.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,11 @@ def modify_forward_desc_for_recompute(self):
type='seed',
inputs={},
outputs={'Out': [added_var]},
attrs={'seed': seed,
'op_device': op_device})
attrs={
'seed': seed,
'op_device': op_device,
'force_cpu': True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

加一点点注释,为啥设置为True

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已加注释

})
self.ops.insert(op_idx, added_op)
# modify dropout op desc so that it accept a seed var as input
op.desc.set_input("Seed", [var_unique_name])
Expand Down
66 changes: 66 additions & 0 deletions python/paddle/fluid/tests/unittests/test_dropout_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,72 @@ def init_test_case(self):
self.fix_seed = False


class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase):
def test_seed_cpu_place(self):
paddle.enable_static()
main_program = Program()
with program_guard(main_program):
seed_input_name = "tensor@SeedInput"
seed_out_name = "tensor@SeedOut"
x_var_name = "tensor@X"
mask_var_name = "tensor@Mask"
seed_input_var = main_program.global_block().create_var(
name=seed_input_name,
shape=[1],
dtype='int32',
persistable=False,
stop_gradient=True)
seed_out_var = main_program.global_block().create_var(
name=seed_out_name,
shape=[1],
dtype='int32',
persistable=False,
stop_gradient=True)
x_var = main_program.global_block().create_var(
name=x_var_name,
shape=[1],
dtype='float32',
persistable=False,
stop_gradient=True)
mask_var = main_program.global_block().create_var(
name=mask_var_name,
shape=[1],
dtype='int',
persistable=False,
stop_gradient=True)

main_program.global_block().append_op(
type="fill_constant",
outputs={"Out": x_var_name},
attrs={
"shape": [1],
"dtype": x_var.dtype,
"value": 1.0,
"place_type": 0
})
main_program.global_block().append_op(
type='seed',
inputs={},
outputs={'Out': seed_input_var},
attrs={'seed': 1,
'force_cpu': True})
main_program.global_block().append_op(
type='dropout',
inputs={'X': x_var,
'Seed': seed_input_var},
attrs={'dropout_prob': 1.0},
outputs={'Out': seed_out_var,
'Mask': mask_var})
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
x_out, mask_out = exe.run(
main_program,
feed={},
fetch_list=[seed_out_var.name, mask_var.name])
expect_value = np.array([0.0]).astype('float32')
self.assertTrue(np.array_equal(x_out, expect_value))


class TestDropoutOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
Expand Down