Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix l2_normalize & add nn.functional.normalize #6940

Merged
merged 16 commits into from
Dec 25, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Functional operations for neural networks
.. autofunction:: hardsigmoid
.. autofunction:: hardswish
.. autofunction:: hardtanh
.. autofunction:: normalize
.. autofunction:: l2_normalize
.. autofunction:: leaky_relu
.. autofunction:: elu
Expand All @@ -22,6 +23,7 @@ Functional operations for neural networks
.. autofunction:: pad
.. autofunction:: prelu
.. autofunction:: logsigmoid
.. autofunction:: log_softmax
.. autofunction:: gelu
.. autofunction:: glu
.. autofunction:: softsign
Expand Down
6 changes: 5 additions & 1 deletion oneflow/core/functional/functional_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1716,8 +1716,12 @@
signature: "TensorTuple (Tensor x, TensorTuple like, Int64 axis) => SplitLike"
bind_python: True

- name: "normalize"
signature: "Tensor (Tensor input, Float p=2.0, Int32 dim=1, Float eps=1e-12) => Normalize"
bind_python: True

- name: "l2_normalize"
signature: "TensorTuple (Tensor input, Int32 axis, Float epsilon) => L2Normalize"
signature: "Tensor (Tensor input, Int32 axis=0, Float epsilon=1e-12) => L2Normalize"
bind_python: True

- name: "l2_normalize_grad"
Expand Down
35 changes: 31 additions & 4 deletions oneflow/core/functional/impl/nn_functor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1660,18 +1660,44 @@ class L2NormalizeFunctor {
op_ = CHECK_JUST(
one::OpBuilder("l2_normalize").Input("x").Output("y").Output("square_x_sum").Build());
}
Maybe<TensorTuple> operator()(const std::shared_ptr<one::Tensor>& input, const int32_t& axis,
const float& epsilon) const {
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const int32_t& axis,
const float& epsilon) const {
MutableAttrMap attrs;
JUST(attrs.SetAttr<int32_t>("axis", axis));
JUST(attrs.SetAttr<int32_t>("axis", 0));
JUST(attrs.SetAttr<float>("epsilon", epsilon));
return OpInterpUtil::Dispatch<TensorTuple>(*op_, {input}, attrs);

if (axis != 0) {
std::vector<int> input_perm(input->shape()->dim_vec().size(), 0);
for (size_t i = 0; i < input_perm.size(); ++i) { input_perm[i] = static_cast<int>(i); }
std::swap(input_perm[0], input_perm[static_cast<size_t>(axis)]);

const auto result = JUST(OpInterpUtil::Dispatch<TensorTuple>(
*op_, {JUST(functional::Transpose(input, input_perm))}, attrs));
return functional::Transpose(result->at(0), input_perm);
}

return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs);
}

private:
std::shared_ptr<OpExpr> op_;
};

class NormalizeFunctor {
public:
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
const int32_t& dim, const float& eps) const {
return SequenceFunction<Maybe<Tensor>(const std::shared_ptr<Tensor>&, const float&,
const int32_t&)>(
[](const auto& x, const float& p, const int32_t& dim) -> Maybe<Tensor> {
return functional::ScalarNorm(x, p, dim, true, NullOpt);
})
.then([&](const auto& x) { return functional::Clamp(x, eps, NullOpt); })
.then([&](const auto& x) { return functional::Div(input, x); })
.call(input, p, dim);
}
};

class FusedSelfAttentionFunctor {
public:
FusedSelfAttentionFunctor() {
Expand Down Expand Up @@ -2155,6 +2181,7 @@ ONEFLOW_FUNCTION_LIBRARY(m) {
m.add_functor<impl::OneHotFunctor>("OneHot");
m.add_functor<impl::FusedSelfAttentionFunctor>("FusedSelfAttention");
m.add_functor<impl::FusedSelfAttentionGradFunctor>("FusedSelfAttentionGrad");
m.add_functor<impl::NormalizeFunctor>("Normalize");
m.add_functor<impl::L2NormalizeFunctor>("L2Normalize");
m.add_functor<impl::L2NormalizeGradFunctor>("L2NormalizeGrad");
m.add_functor<impl::FusedBiasAddGeluFunctor>("FusedBiasAddGelu");
Expand Down
78 changes: 78 additions & 0 deletions python/oneflow/framework/docstr/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,81 @@

""",
)

add_docstr(
oneflow._C.normalize,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
oneflow._C.normalize,
oneflow.normalize,

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

那这个需要函数导出到oneflow目录下么

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

可以导出一下。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我查了一下pytorch的接口 l2_normalize和normalize都没有导出到torch下都是在torch.nn.functional下的 我们还要导出么

"""nn.functional.normalize(input: Tensor, p: float=2.0, dim: int=0, epsilon: float=1e-12) -> Tensor

Performs :math:`L_p` normalization of inputs over specified dimension

For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each
:math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as:

.. math::
v = \\frac{v}{\max(\\lVert v \\rVert_p, \\epsilon)}.

With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization.

But note that the gradient calculation of the input tensor has different results on different frameworks
when `input.shape[dim] = 1`.

Args:
input (oneflow.Tensor): input tensor of any shape
p (float): the exponent value in the norm formulation. Default: 2
dim (int): the dimension to reduce. Default: 1
eps (float): small value to avoid division by zero. Default: 1e-12

For example:

.. code-block:: python

>>> import oneflow as flow
>>> x = flow.tensor([[1, 2], [3, 4]], dtype=flow.float32)
>>> out = flow.nn.functional.normalize(x, 2, 0)
>>> out
tensor([[0.3162, 0.4472],
[0.9487, 0.8944]], dtype=oneflow.float32)
>>> out = flow.nn.functional.normalize(x, 2, 1)
>>> out
tensor([[0.4472, 0.8944],
[0.6000, 0.8000]], dtype=oneflow.float32)

""",
)

add_docstr(
oneflow._C.l2_normalize,
"""nn.functional.l2_normalize(input: Tensor, dim: int=0, epsilon: float=1e-12) -> Tensor

Use L2 norm to normalizes along dimension `dim`

The equation is:

.. math::
out = \\frac{x}{max(\\sqrt{\\Sigma{x^2}}, \\epsilon)}

Args:
input (oneflow.Tensor): Input Tensor
dim (int): The axis on which to apply L2 normalization. Defaults to 0.
epsilon (float): The epsilon value is used to avoid division by zero. Defaults to 1e-12.

Returns:
oneflow.Tensor: The normalized Tensor

For example:

.. code-block:: python

>>> import oneflow as flow
>>> x = flow.tensor([[1, 2], [3, 4]], dtype=flow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 0)
>>> out
tensor([[0.3162, 0.4472],
[0.9487, 0.8944]], dtype=oneflow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 1)
>>> out
tensor([[0.4472, 0.8944],
[0.6000, 0.8000]], dtype=oneflow.float32)

""",
)
4 changes: 3 additions & 1 deletion python/oneflow/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
limitations under the License.
"""
from oneflow.nn.modules.interpolate import interpolate
from oneflow.nn.modules.norm import l2_normalize
from oneflow.nn.modules.affine_grid import affine_grid
from oneflow.nn.modules.grid_sample import grid_sample
from oneflow.nn.modules.sparse_softmax_cross_entropy import sparse_softmax_cross_entropy
Expand Down Expand Up @@ -43,6 +42,7 @@
from oneflow._C import gelu
from oneflow._C import glu
from oneflow._C import logsigmoid
from oneflow._C import log_softmax
from oneflow._C import softsign
from oneflow._C import softmax
from oneflow._C import softplus
Expand All @@ -57,6 +57,8 @@
from oneflow._C import triplet_margin_loss
from oneflow._C import ctc_greedy_decoder
from oneflow._C import one_hot
from oneflow._C import l2_normalize
from oneflow._C import normalize
from oneflow.nn.modules.sparse import embedding
from oneflow.nn.modules.linear import linear
from oneflow.nn.modules.activation import relu6
60 changes: 0 additions & 60 deletions python/oneflow/nn/modules/norm.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@

import unittest
from collections import OrderedDict

import numpy as np
from test_util import GenArgList

from oneflow.test_utils.automated_test_util import *
import numpy as np
import oneflow as flow
import oneflow.unittest

Expand All @@ -32,12 +31,18 @@ def _count(shape, begin_axis, end_axis):


def _l2_norm_numpy(x, dim, epsilon=1e-12):
axes = [k for k in range(len(list(x.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

x = np.transpose(x, axes_tuple)

square_x_sum_shape = list(x.shape)
square_x_sum_shape[dim] = 1
square_x_sum_shape[0] = 1

c = x.shape[dim]
c = x.shape[0]
n = int(x.size / c)
d = _count(x.shape, dim + 1, len(x.shape))
d = _count(x.shape, 1, len(x.shape))

square_x_sum = np.zeros(square_x_sum_shape)

Expand All @@ -58,13 +63,21 @@ def _l2_norm_numpy(x, dim, epsilon=1e-12):

square_x_sum = square_x_sum_flatten.reshape(square_x_sum.shape)
out = out.reshape(x.shape)
return out, square_x_sum
return np.transpose(out, axes_tuple), np.transpose(square_x_sum, axes_tuple)


def _l2_norm_backward_np(dy, y, square_x_sum, dim, epsilon=1e-12):
c = dy.shape[dim]
axes = [k for k in range(len(list(y.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

dy = np.transpose(dy, axes_tuple)
y = np.transpose(y, axes_tuple)
square_x_sum = np.transpose(square_x_sum, axes_tuple)

c = dy.shape[0]
n = int(dy.size / c)
d = _count(dy.shape, dim + 1, len(y.shape))
d = _count(dy.shape, 1, len(y.shape))

dx = np.zeros(dy.shape).reshape(-1)
dy_flatten = dy.reshape(-1)
Expand All @@ -89,7 +102,7 @@ def _l2_norm_backward_np(dy, y, square_x_sum, dim, epsilon=1e-12):
index = offset + j * d
dx[index] = (1 / norm) * dy_flatten[index]

return dx.reshape(y.shape)
return np.transpose(dx.reshape(y.shape), axes_tuple)


def _test_l2_normalize(test_case, device, dim, shape):
Expand Down Expand Up @@ -124,5 +137,24 @@ def test_l2_normalize(test_case):
arg[0](test_case, *arg[1:])


@flow.unittest.skip_unless_1n1d()
class TestFunctionalNormalize(flow.unittest.TestCase):
@autotest(check_graph=False)
def test_functional_normalize(test_case):
device = random_device()
ndim = random(low=2)

shape = list(random_tensor(ndim).value().shape)
dim = random(low=0, high=ndim).to(int).value()
shape[dim] = random(low=2, high=8).to(int).value()
shape = tuple(shape)

x = random_pytorch_tensor(len(shape), *shape).to(device)
m = torch.nn.functional.normalize
y = m(x, oneof(2, 3, 4), dim, 1e-12)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
y = m(x, oneof(2, 3, 4), dim, 1e-12)
y = torch.nn.functional.normalize(x, oneof(2, 3, 4), dim, 1e-12)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的


return y


if __name__ == "__main__":
unittest.main()