Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add inplace api transpose_, t_, normal_,cauchy_, geometric_ #57093

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions paddle/phi/api/yaml/backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,17 @@
composite : gather_nd_grad(x, index, out_grad, x_grad)
no_need_buffer : x

- backward_op : gaussian_inplace_grad
forward : gaussian_inplace(Tensor x, float mean=0, float std=1.0, int seed=0) -> Tensor(out)
args : (Tensor out_grad, float mean=0, float std=1.0, int seed=0)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [out_grad]
kernel :
func : gaussian_inplace_grad
inplace : (out_grad -> x_grad)

- backward_op : gelu_grad
forward : gelu(Tensor x, bool approximate) -> Tensor(out)
args : (Tensor x, Tensor out_grad, bool approximate)
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/yaml/generator/api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,7 @@ def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False):
"unsqueeze",
"reshape",
"flatten",
"transpose",
]:
i = 0
for kernel_out in outputs_args:
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/yaml/legacy_ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,7 @@
func : TransposeInferMeta
kernel :
func : transpose
inplace : (x -> out)
backward : transpose_grad

- op : tril
Expand Down
13 changes: 13 additions & 0 deletions paddle/phi/api/yaml/ops.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,19 @@
func : gather_tree
data_type : ids

- op : gaussian_inplace
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么需要新增单独的inplace算子来着?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

原有的gassian算子没有输入tensor,如果用gaussian+assign的方式梯度计算会不对

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

现有的uniform_inplace也是新增的uniform_inpalce算子, uniform+gaussian 可以组合出所有其他的分布,后续用应该也不需要新增其他的这种算子了

args: (Tensor x, float mean=0, float std=1.0, int seed=0)
output: Tensor(out)
infer_meta:
func: UnchangedInferMeta
param: [x]
kernel:
func: gaussian_inplace
data_type: x
backend : x
inplace: (x -> out)
backward: gaussian_inplace_grad

- op : gelu
args : (Tensor x, bool approximate = false)
output : Tensor(out)
Expand Down
41 changes: 41 additions & 0 deletions paddle/phi/kernels/cpu/gaussian_inplace_grad_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/gaussian_inplace_grad_kernel.h"

#include "paddle/phi/core/kernel_registry.h"

namespace phi {

template <typename T, typename Context>
void GaussianInplaceGradKernel(const Context& ctx,
const DenseTensor& out_grad UNUSED,
float mean UNUSED,
float std UNUSED,
int seed UNUSED,
DenseTensor* x_grad) {
if (x_grad) {
auto* data = ctx.template Alloc<T>(x_grad);
std::fill(data, data + x_grad->numel(), T(0));
}
}

} // namespace phi

PD_REGISTER_KERNEL(gaussian_inplace_grad,
CPU,
ALL_LAYOUT,
phi::GaussianInplaceGradKernel,
float,
double) {}
31 changes: 31 additions & 0 deletions paddle/phi/kernels/cpu/gaussian_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,38 @@ void GaussianKernel(const Context& dev_ctx,
}
}

template <typename T, typename Context>
void GaussianInplaceKernel(const Context& dev_ctx,
const DenseTensor& x,
float mean,
float std,
Comment on lines +54 to +55
Copy link
Contributor

@jeff41404 jeff41404 Sep 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if dtype of x is double, implicit type conversion will be performed in mean and std, resulting in a loss of accuracy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above

int seed,
DenseTensor* out) {
T* data = dev_ctx.template Alloc<T>(out);
std::normal_distribution<T> dist(mean, std);

int64_t size = out->numel();
std::shared_ptr<std::mt19937_64> engine;
if (seed) {
engine = std::make_shared<std::mt19937_64>();
engine->seed(seed);
} else {
engine = dev_ctx.GetGenerator()->GetCPUEngine();
}

for (int64_t i = 0; i < size; ++i) {
data[i] = dist(*engine);
}
}

} // namespace phi

PD_REGISTER_KERNEL(
gaussian, CPU, ALL_LAYOUT, phi::GaussianKernel, float, double) {}

PD_REGISTER_KERNEL(gaussian_inplace,
CPU,
ALL_LAYOUT,
phi::GaussianInplaceKernel,
float,
double) {}
29 changes: 29 additions & 0 deletions paddle/phi/kernels/gaussian_inplace_grad_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/phi/core/dense_tensor.h"

namespace phi {

template <typename T, typename Context>
void GaussianInplaceGradKernel(const Context& ctx,
const DenseTensor& out_grad,
float mean,
float std,
int seed,
DenseTensor* x_grad);

} // namespace phi
8 changes: 8 additions & 0 deletions paddle/phi/kernels/gaussian_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,12 @@ void GaussianKernel(const Context& ctx,
DataType dtype,
DenseTensor* out);

template <typename T, typename Context>
void GaussianInplaceKernel(const Context& ctx,
const DenseTensor& x,
float mean,
float std,
int seed,
DenseTensor* out);

} // namespace phi
44 changes: 44 additions & 0 deletions paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/gaussian_inplace_grad_kernel.h"

#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/full_kernel.h"

namespace phi {

template <typename T, typename Context>
void GaussianInplaceGradKernel(const Context& ctx,
const DenseTensor& out_grad,
float mean,
float std,
int seed,
DenseTensor* x_grad) {
auto dims = vectorize(x_grad->dims());
float value = static_cast<float>(0.0f);
phi::FullKernel<T>(ctx, dims, value, phi::DataType::UNDEFINED, x_grad);
}

} // namespace phi

PD_REGISTER_KERNEL(gaussian_inplace_grad,
GPU,
ALL_LAYOUT,
phi::GaussianInplaceGradKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
32 changes: 32 additions & 0 deletions paddle/phi/kernels/gpu/gaussian_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,29 @@ void GaussianKernel(const Context& dev_ctx,
}
}

template <typename T, typename Context>
void GaussianInpalceKernel(const Context& dev_ctx,
const DenseTensor& x,
float mean,
float std,
int seed,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
if (seed == 0) {
// use global Generator seed
using MT = typename phi::dtype::MPTypeTrait<T>::Type;
funcs::normal_distribution<MT> dist;
funcs::normal_transform<MT> trans(static_cast<MT>(mean),
static_cast<MT>(std));
funcs::distribution_and_transform<T>(dev_ctx, out, dist, trans);
} else {
// use OP seed
auto func =
GaussianGenerator<T>(static_cast<T>(mean), static_cast<T>(std), seed);
IndexKernel<T, GaussianGenerator<T>>(dev_ctx, out, func);
}
}

} // namespace phi

PD_REGISTER_KERNEL(gaussian,
Expand All @@ -86,3 +109,12 @@ PD_REGISTER_KERNEL(gaussian,
phi::dtype::bfloat16,
float,
double) {}

PD_REGISTER_KERNEL(gaussian_inpalce,
GPU,
ALL_LAYOUT,
phi::GaussianInpalceKernel,
phi::dtype::float16,
phi::dtype::bfloat16,
float,
double) {}
2 changes: 0 additions & 2 deletions paddle/phi/kernels/stride/transpose_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ void TransposeStridedKernel(const Context& ctx,

auto meta = out->meta();
auto in_stride = x.strides();
auto in_dims = x.dims();
meta.strides = in_stride;
for (int i = 0; i < static_cast<int>(formated_axis.size()); i++) {
meta.strides[i] = in_stride[formated_axis[i]];
meta.dims[i] = in_dims[formated_axis[i]];
}
meta.offset = x.offset();

Expand Down
10 changes: 10 additions & 0 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,16 @@
from .tensor.creation import tril_indices # noqa: F401
from .tensor.creation import triu_indices # noqa: F401
from .tensor.creation import polar # noqa: F401
from .tensor.creation import geometric_ # noqa: F401
from .tensor.creation import cauchy_ # noqa: F401
from .tensor.linalg import matmul # noqa: F401
from .tensor.linalg import dot # noqa: F401
from .tensor.linalg import norm # noqa: F401
from .tensor.linalg import transpose # noqa: F401
from .tensor.linalg import transpose_ # noqa: F401
from .tensor.linalg import dist # noqa: F401
from .tensor.linalg import t # noqa: F401
from .tensor.linalg import t_ # noqa: F401
from .tensor.linalg import cdist # noqa: F401
from .tensor.linalg import cross # noqa: F401
from .tensor.linalg import cholesky # noqa: F401
Expand Down Expand Up @@ -381,6 +385,7 @@
from .tensor.random import multinomial # noqa: F401
from .tensor.random import standard_normal # noqa: F401
from .tensor.random import normal # noqa: F401
from .tensor.random import normal_ # noqa: F401
from .tensor.random import uniform # noqa: F401
from .tensor.random import randn # noqa: F401
from .tensor.random import rand # noqa: F401
Expand Down Expand Up @@ -505,6 +510,7 @@
'allclose',
'isclose',
't',
't_',
'add',
'subtract',
'diag',
Expand Down Expand Up @@ -556,6 +562,7 @@
'any',
'slice',
'normal',
'normal_',
'logsumexp',
'full',
'unsqueeze',
Expand Down Expand Up @@ -736,6 +743,9 @@
'tanh',
'tanh_',
'transpose',
'transpose_',
'cauchy_',
'geometric_',
'randn',
'strided_slice',
'unique',
Expand Down
13 changes: 13 additions & 0 deletions python/paddle/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
from .creation import empty_like # noqa: F401
from .creation import complex # noqa: F401
from .creation import polar # noqa: F401
from .creation import cauchy_ # noqa: F401
from .creation import geometric_ # noqa: F401
from .linalg import matmul # noqa: F401
from .linalg import dot # noqa: F401
from .linalg import cov # noqa: F401
Expand All @@ -51,9 +53,11 @@
from .linalg import pca_lowrank # noqa: F401
from .linalg import cond # noqa: F401
from .linalg import transpose # noqa: F401
from .linalg import transpose_ # noqa: F401
from .linalg import lstsq # noqa: F401
from .linalg import dist # noqa: F401
from .linalg import t # noqa: F401
from .linalg import t_ # noqa: F401
from .linalg import cross # noqa: F401
from .linalg import cholesky # noqa: F401
from .linalg import bmm # noqa: F401
Expand Down Expand Up @@ -327,6 +331,7 @@
from .random import multinomial # noqa: F401
from .random import standard_normal # noqa: F401
from .random import normal # noqa: F401
from .random import normal_ # noqa: F401
from .random import uniform # noqa: F401
from .random import uniform_ # noqa: F401
from .random import randn # noqa: F401
Expand Down Expand Up @@ -381,9 +386,12 @@
'norm',
'cond',
'transpose',
'cauchy_',
'geometric_',
'lstsq',
'dist',
't',
't_',
'cross',
'cholesky',
'bmm',
Expand Down Expand Up @@ -558,6 +566,10 @@
'stack',
'strided_slice',
'transpose',
'transpose_',
'cauchy_',
'geometric_',
'tan_',
'unique',
'unique_consecutive',
'unsqueeze',
Expand Down Expand Up @@ -673,6 +685,7 @@
'i1e',
'polygamma',
'polygamma_',
'normal_',
]

# this list used in math_op_patch.py for magic_method bind
Expand Down
Loading