add inplace api transpose_, t_, normal_,cauchy_, geometric_ (PaddlePa…

…ddle#57093) * add inplace * fix transpose inpalce error * fix error * fix * fix * add gaussian inpalce kernel * change cauchy_ gepmetric impl * fix typro * add test * remove gaussian test * fix sample code error * fix sample code * fix sample code error
danleifeng · Sep 22, 2023 · 8248107 · 8248107
1 parent 24128d7
commit 8248107
Show file tree

Hide file tree

Showing 20 changed files with 894 additions and 2 deletions.
diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml
@@ -939,6 +939,17 @@
   composite : gather_nd_grad(x, index, out_grad, x_grad)
   no_need_buffer : x
 
+- backward_op : gaussian_inplace_grad
+  forward : gaussian_inplace(Tensor x, float mean=0, float std=1.0, int seed=0) -> Tensor(out)
+  args : (Tensor out_grad, float mean=0, float std=1.0, int seed=0)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [out_grad]
+  kernel :
+    func : gaussian_inplace_grad
+  inplace : (out_grad -> x_grad)
+
 - backward_op : gelu_grad
   forward : gelu(Tensor x,  bool approximate) -> Tensor(out)
   args : (Tensor x, Tensor out_grad,  bool approximate)

diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py
@@ -1223,6 +1223,7 @@ def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False):
             "unsqueeze",
             "reshape",
             "flatten",
+            "transpose",
         ]:
             i = 0
             for kernel_out in outputs_args:

diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -1065,6 +1065,7 @@
     func : TransposeInferMeta
   kernel :
     func : transpose
+  inplace : (x -> out)
   backward : transpose_grad
 
 - op : tril

diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml
@@ -1052,6 +1052,19 @@
     func : gather_tree
     data_type : ids
 
+- op : gaussian_inplace
+  args: (Tensor x, float mean=0, float std=1.0, int seed=0)
+  output: Tensor(out)
+  infer_meta:
+    func: UnchangedInferMeta
+    param: [x]
+  kernel:
+    func: gaussian_inplace
+    data_type: x
+    backend : x
+  inplace: (x -> out)
+  backward: gaussian_inplace_grad
+
 - op : gelu
   args : (Tensor x,  bool approximate = false)
   output : Tensor(out)

diff --git a/paddle/phi/kernels/cpu/gaussian_inplace_grad_kernel.cc b/paddle/phi/kernels/cpu/gaussian_inplace_grad_kernel.cc
@@ -0,0 +1,41 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/gaussian_inplace_grad_kernel.h"
+
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void GaussianInplaceGradKernel(const Context& ctx,
+                               const DenseTensor& out_grad UNUSED,
+                               float mean UNUSED,
+                               float std UNUSED,
+                               int seed UNUSED,
+                               DenseTensor* x_grad) {
+  if (x_grad) {
+    auto* data = ctx.template Alloc<T>(x_grad);
+    std::fill(data, data + x_grad->numel(), T(0));
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(gaussian_inplace_grad,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::GaussianInplaceGradKernel,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/cpu/gaussian_kernel.cc b/paddle/phi/kernels/cpu/gaussian_kernel.cc
@@ -48,7 +48,38 @@ void GaussianKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void GaussianInplaceKernel(const Context& dev_ctx,
+                           const DenseTensor& x,
+                           float mean,
+                           float std,
+                           int seed,
+                           DenseTensor* out) {
+  T* data = dev_ctx.template Alloc<T>(out);
+  std::normal_distribution<T> dist(mean, std);
+
+  int64_t size = out->numel();
+  std::shared_ptr<std::mt19937_64> engine;
+  if (seed) {
+    engine = std::make_shared<std::mt19937_64>();
+    engine->seed(seed);
+  } else {
+    engine = dev_ctx.GetGenerator()->GetCPUEngine();
+  }
+
+  for (int64_t i = 0; i < size; ++i) {
+    data[i] = dist(*engine);
+  }
+}
+
 }  // namespace phi
 
 PD_REGISTER_KERNEL(
     gaussian, CPU, ALL_LAYOUT, phi::GaussianKernel, float, double) {}
+
+PD_REGISTER_KERNEL(gaussian_inplace,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::GaussianInplaceKernel,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/gaussian_inplace_grad_kernel.h b/paddle/phi/kernels/gaussian_inplace_grad_kernel.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void GaussianInplaceGradKernel(const Context& ctx,
+                               const DenseTensor& out_grad,
+                               float mean,
+                               float std,
+                               int seed,
+                               DenseTensor* x_grad);
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/gaussian_kernel.h b/paddle/phi/kernels/gaussian_kernel.h
@@ -29,4 +29,12 @@ void GaussianKernel(const Context& ctx,
                     DataType dtype,
                     DenseTensor* out);
 
+template <typename T, typename Context>
+void GaussianInplaceKernel(const Context& ctx,
+                           const DenseTensor& x,
+                           float mean,
+                           float std,
+                           int seed,
+                           DenseTensor* out);
+
 }  // namespace phi
diff --git a/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu b/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu
@@ -0,0 +1,44 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/gaussian_inplace_grad_kernel.h"
+
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void GaussianInplaceGradKernel(const Context& ctx,
+                               const DenseTensor& out_grad,
+                               float mean,
+                               float std,
+                               int seed,
+                               DenseTensor* x_grad) {
+  auto dims = vectorize(x_grad->dims());
+  float value = static_cast<float>(0.0f);
+  phi::FullKernel<T>(ctx, dims, value, phi::DataType::UNDEFINED, x_grad);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(gaussian_inplace_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::GaussianInplaceGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/gaussian_kernel.cu b/paddle/phi/kernels/gpu/gaussian_kernel.cu
@@ -76,6 +76,29 @@ void GaussianKernel(const Context& dev_ctx,
   }
 }
 
+template <typename T, typename Context>
+void GaussianInpalceKernel(const Context& dev_ctx,
+                           const DenseTensor& x,
+                           float mean,
+                           float std,
+                           int seed,
+                           DenseTensor* out) {
+  dev_ctx.template Alloc<T>(out);
+  if (seed == 0) {
+    // use global Generator seed
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    funcs::normal_distribution<MT> dist;
+    funcs::normal_transform<MT> trans(static_cast<MT>(mean),
+                                      static_cast<MT>(std));
+    funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
+  } else {
+    // use OP seed
+    auto func =
+        GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed);
+    IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
+  }
+}
+
 }  // namespace phi
 
 PD_REGISTER_KERNEL(gaussian,
@@ -86,3 +109,12 @@ PD_REGISTER_KERNEL(gaussian,
                    phi::dtype::bfloat16,
                    float,
                    double) {}
+
+PD_REGISTER_KERNEL(gaussian_inpalce,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::GaussianInpalceKernel,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
+                   float,
+                   double) {}
diff --git a/paddle/phi/kernels/stride/transpose_kernel.cc b/paddle/phi/kernels/stride/transpose_kernel.cc
@@ -33,11 +33,9 @@ void TransposeStridedKernel(const Context& ctx,
 
   auto meta = out->meta();
   auto in_stride = x.strides();
-  auto in_dims = x.dims();
   meta.strides = in_stride;
   for (int i = 0; i < static_cast<int>(formated_axis.size()); i++) {
     meta.strides[i] = in_stride[formated_axis[i]];
-    meta.dims[i] = in_dims[formated_axis[i]];
   }
   meta.offset = x.offset();
 

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
@@ -122,12 +122,16 @@
 from .tensor.creation import tril_indices  # noqa: F401
 from .tensor.creation import triu_indices  # noqa: F401
 from .tensor.creation import polar  # noqa: F401
+from .tensor.creation import geometric_  # noqa: F401
+from .tensor.creation import cauchy_  # noqa: F401
 from .tensor.linalg import matmul  # noqa: F401
 from .tensor.linalg import dot  # noqa: F401
 from .tensor.linalg import norm  # noqa: F401
 from .tensor.linalg import transpose  # noqa: F401
+from .tensor.linalg import transpose_  # noqa: F401
 from .tensor.linalg import dist  # noqa: F401
 from .tensor.linalg import t  # noqa: F401
+from .tensor.linalg import t_  # noqa: F401
 from .tensor.linalg import cdist  # noqa: F401
 from .tensor.linalg import cross  # noqa: F401
 from .tensor.linalg import cholesky  # noqa: F401
@@ -381,6 +385,7 @@
 from .tensor.random import multinomial  # noqa: F401
 from .tensor.random import standard_normal  # noqa: F401
 from .tensor.random import normal  # noqa: F401
+from .tensor.random import normal_  # noqa: F401
 from .tensor.random import uniform  # noqa: F401
 from .tensor.random import randn  # noqa: F401
 from .tensor.random import rand  # noqa: F401
@@ -505,6 +510,7 @@
     'allclose',
     'isclose',
     't',
+    't_',
     'add',
     'subtract',
     'diag',
@@ -556,6 +562,7 @@
     'any',
     'slice',
     'normal',
+    'normal_',
     'logsumexp',
     'full',
     'unsqueeze',
@@ -736,6 +743,9 @@
     'tanh',
     'tanh_',
     'transpose',
+    'transpose_',
+    'cauchy_',
+    'geometric_',
     'randn',
     'strided_slice',
     'unique',

diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
@@ -43,6 +43,8 @@
 from .creation import empty_like  # noqa: F401
 from .creation import complex  # noqa: F401
 from .creation import polar  # noqa: F401
+from .creation import cauchy_  # noqa: F401
+from .creation import geometric_  # noqa: F401
 from .linalg import matmul  # noqa: F401
 from .linalg import dot  # noqa: F401
 from .linalg import cov  # noqa: F401
@@ -51,9 +53,11 @@
 from .linalg import pca_lowrank  # noqa: F401
 from .linalg import cond  # noqa: F401
 from .linalg import transpose  # noqa: F401
+from .linalg import transpose_  # noqa: F401
 from .linalg import lstsq  # noqa: F401
 from .linalg import dist  # noqa: F401
 from .linalg import t  # noqa: F401
+from .linalg import t_  # noqa: F401
 from .linalg import cross  # noqa: F401
 from .linalg import cholesky  # noqa: F401
 from .linalg import bmm  # noqa: F401
@@ -327,6 +331,7 @@
 from .random import multinomial  # noqa: F401
 from .random import standard_normal  # noqa: F401
 from .random import normal  # noqa: F401
+from .random import normal_  # noqa: F401
 from .random import uniform  # noqa: F401
 from .random import uniform_  # noqa: F401
 from .random import randn  # noqa: F401
@@ -381,9 +386,12 @@
     'norm',
     'cond',
     'transpose',
+    'cauchy_',
+    'geometric_',
     'lstsq',
     'dist',
     't',
+    't_',
     'cross',
     'cholesky',
     'bmm',
@@ -558,6 +566,10 @@
     'stack',
     'strided_slice',
     'transpose',
+    'transpose_',
+    'cauchy_',
+    'geometric_',
+    'tan_',
     'unique',
     'unique_consecutive',
     'unsqueeze',
@@ -673,6 +685,7 @@
     'i1e',
     'polygamma',
     'polygamma_',
+    'normal_',
 ]
 
 # this list used in math_op_patch.py for magic_method bind