Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PHI decoupling] move strided_memcpy.h to phi #50346

Merged
merged 6 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/fluid/imperative/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/parallel_context.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
#endif
Expand Down Expand Up @@ -103,7 +103,7 @@ static void SplitTensorsForAllReduce(
}
// Sometimes direct copies will be faster
if (p_dense_tensors->size() < 10) {
operators::StridedMemcpyWithAxis0<T>(context, *in, shape_refer, &outs);
phi::funcs::StridedMemcpyWithAxis0<T>(context, *in, shape_refer, &outs);
} else {
operators::math::SplitFunctor<DeviceContext, T> split_functor_;
split_functor_(context, *in, shape_refer, 0, &outs);
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor math_function)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op)
if (WITH_GPU)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/concat_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/concat_kernel.h"
#include "paddle/phi/kernels/funcs/concat_funcs.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/crop_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators { // Internal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/collect_fpn_proposals_op.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/partial_concat_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down
26 changes: 13 additions & 13 deletions paddle/fluid/operators/sequence_ops/sequence_slice_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ limitations under the License. */

#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -140,12 +140,12 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));

StridedMemcpy<T>(ctx.device_context(),
in_t.data<T>(),
in_stride,
in_t.dims(),
out_stride,
out->data<T>() + out_offset);
phi::funcs::StridedMemcpy<T>(ctx.device_context(),
in_t.data<T>(),
in_stride,
in_t.dims(),
out_stride,
out->data<T>() + out_offset);
out_offset += length_data[i] * in_stride[0];
}
}
Expand Down Expand Up @@ -201,12 +201,12 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));

StridedMemcpy<T>(ctx.device_context(),
out_grad_t.data<T>(),
out_grad_stride,
out_grad_t.dims(),
x_grad_stride,
x_grad_t.data<T>());
phi::funcs::StridedMemcpy<T>(ctx.device_context(),
out_grad_t.data<T>(),
out_grad_stride,
out_grad_t.dims(),
x_grad_stride,
x_grad_t.data<T>());
}
}
}
Expand Down
41 changes: 22 additions & 19 deletions paddle/fluid/operators/spp_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/pooling.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -96,12 +96,13 @@ class SppKernel : public framework::OpKernel<T> {
out_level.Resize(output_flatten_shape);
// concat
auto out_level_stride = phi::stride(out_level.dims());
StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_level.data<T>(),
out_level_stride,
out_level.dims(),
out_stride,
out->data<T>() + output_offset);
phi::funcs::StridedMemcpy<T>(
context.template device_context<DeviceContext>(),
out_level.data<T>(),
out_level_stride,
out_level.dims(),
out_stride,
out->data<T>() + output_offset);
output_offset += out_level.dims()[1] * out_level_stride[1];
}
}
Expand Down Expand Up @@ -150,19 +151,21 @@ class SppGradKernel : public framework::OpKernel<T> {
outgrad_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
auto flatten_stride = phi::stride(out_level.dims());
// memcpy
StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out->data<T>() + out_offset,
out_stride,
out_level.dims(),
flatten_stride,
out_level.data<T>());
phi::funcs::StridedMemcpy<T>(
context.template device_context<DeviceContext>(),
out->data<T>() + out_offset,
out_stride,
out_level.dims(),
flatten_stride,
out_level.data<T>());

StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_grad->data<T>() + out_offset,
out_stride,
outgrad_level.dims(),
flatten_stride,
outgrad_level.data<T>());
phi::funcs::StridedMemcpy<T>(
context.template device_context<DeviceContext>(),
out_grad->data<T>() + out_offset,
out_stride,
outgrad_level.dims(),
flatten_stride,
outgrad_level.data<T>());
out_offset += out_level.dims()[1] * out_stride[1];
// flatten backward to nchw

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/unbind_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace paddle {
namespace operators {
Expand Down
17 changes: 8 additions & 9 deletions paddle/fluid/pybind/tensor_py.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
Expand Down Expand Up @@ -724,14 +724,13 @@ void _concatCompute(const std::vector<phi::DenseTensor> &ins,
for (auto &in : ins) {
auto in_stride = phi::stride_numel(in.dims());
auto out_stride = phi::stride_numel(out->dims());
paddle::operators::StridedNumelCopyWithAxis<T>(
ctx,
axis,
out->data<T>() + output_offset,
out_stride,
in.data<T>(),
in_stride,
in_stride[axis]);
phi::funcs::StridedNumelCopyWithAxis<T>(ctx,
axis,
out->data<T>() + output_offset,
out_stride,
in.data<T>(),
in_stride,
in_stride[axis]);
output_offset += in_stride[axis];
}
} else {
Expand Down
17 changes: 8 additions & 9 deletions paddle/phi/kernels/cpu/concat_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#include "paddle/phi/kernels/concat_kernel.h"

#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
Expand All @@ -24,6 +23,7 @@
#include "paddle/phi/core/lod_utils.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#include "paddle/phi/kernels/funcs/concat_funcs.h"
#include "paddle/phi/kernels/funcs/strided_memcpy.h"

namespace phi {

Expand Down Expand Up @@ -86,14 +86,13 @@ void ConcatKernel(const Context& dev_ctx,
}
auto in_stride = phi::stride_numel(in->dims());
auto out_stride = phi::stride_numel(out->dims());
paddle::operators::StridedNumelCopyWithAxis<T>(
dev_ctx,
axis,
out->data<T>() + output_offset,
out_stride,
in->data<T>(),
in_stride,
in_stride[axis]);
phi::funcs::StridedNumelCopyWithAxis<T>(dev_ctx,
axis,
out->data<T>() + output_offset,
out_stride,
in->data<T>(),
in_stride,
in_stride[axis]);
output_offset += in_stride[axis];
}
} else {
Expand Down
Loading