Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use normalize instead of l2_normalize #7113

Merged
merged 19 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Functional operations for neural networks
.. autofunction:: hardswish
.. autofunction:: hardtanh
.. autofunction:: normalize
.. autofunction:: l2_normalize
.. autofunction:: leaky_relu
.. autofunction:: elu
.. autofunction:: celu
Expand Down
4 changes: 2 additions & 2 deletions oneflow/core/functional/functional_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1762,12 +1762,12 @@
bind_python: True

- name: "normalize"
signature: "Tensor (Tensor input, Float p=2.0, Int32 dim=1, Float eps=1e-12) => Normalize"
signature: "Tensor (Tensor input, Float p=2.0, Int32 dim=1, Float eps=1e-12, Bool use_l2_norm_kernel=True) => Normalize"
bind_python: True

- name: "l2_normalize"
signature: "Tensor (Tensor input, Int32 axis=0, Float epsilon=1e-12) => L2Normalize"
bind_python: True
bind_python: False

- name: "l2_normalize_grad"
signature: "Tensor (Tensor dy, Tensor y, Tensor square_x_sum, Int32 axis, Float epsilon) => L2NormalizeGrad"
Expand Down
32 changes: 21 additions & 11 deletions oneflow/core/functional/impl/nn_functor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1663,21 +1663,27 @@ class L2NormalizeFunctor {
}
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const int32_t& axis,
const float& epsilon) const {
const auto ndims = input->shape()->NumAxes();
const auto final_dim = ndims - 1;

auto axis_ = axis >= 0 ? axis : axis + ndims;
CHECK_GE_OR_RETURN(axis_, 0) << "Axis should >=0 but axis is " << axis_ << " now.";
CHECK_LE_OR_RETURN(axis_, final_dim)
<< "Axis should <" << ndims << " but axis is " << axis_ << " now.";

MutableAttrMap attrs;
JUST(attrs.SetAttr<int32_t>("axis", 0));
JUST(attrs.SetAttr<float>("epsilon", epsilon));
JUST(attrs.SetAttr<int32_t>("axis", final_dim));

if (axis != 0) {
std::vector<int> input_perm(input->shape()->dim_vec().size(), 0);
for (size_t i = 0; i < input_perm.size(); ++i) { input_perm[i] = static_cast<int>(i); }
std::swap(input_perm[0], input_perm[static_cast<size_t>(axis)]);
if (axis_ == final_dim) { return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs); }

const auto result = JUST(OpInterpUtil::Dispatch<TensorTuple>(
*op_, {JUST(functional::Transpose(input, input_perm))}, attrs));
return functional::Transpose(result->at(0), input_perm);
}
std::vector<int> input_perm(input->shape()->dim_vec().size(), 0);
for (size_t i = 0; i < input_perm.size(); ++i) { input_perm[i] = static_cast<int>(i); }
std::swap(input_perm[final_dim], input_perm[static_cast<size_t>(axis_)]);

return OpInterpUtil::Dispatch<Tensor>(*op_, {input}, attrs);
const auto result = JUST(OpInterpUtil::Dispatch<TensorTuple>(
*op_, {JUST(functional::Transpose(input, input_perm))}, attrs));
return functional::Transpose(result->at(0), input_perm);
}

private:
Expand All @@ -1687,7 +1693,11 @@ class L2NormalizeFunctor {
class NormalizeFunctor {
public:
Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& input, const float& p,
const int32_t& dim, const float& eps) const {
const int32_t& dim, const float& eps,
const bool& use_l2_norm_kernel) const {
if (use_l2_norm_kernel && (std::fabs(p - 2.0f) < std::numeric_limits<float>::min())) {
return functional::L2Normalize(input, dim, eps);
}
return SequenceFunction<Maybe<Tensor>(const std::shared_ptr<Tensor>&, const float&,
const int32_t&)>(
[](const auto& x, const float& p, const int32_t& dim) -> Maybe<Tensor> {
Expand Down
37 changes: 0 additions & 37 deletions python/oneflow/framework/docstr/norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,40 +304,3 @@

""",
)

add_docstr(
oneflow._C.l2_normalize,
"""nn.functional.l2_normalize(input: Tensor, dim: int=0, epsilon: float=1e-12) -> Tensor

Use L2 norm to normalizes along dimension `dim`

The equation is:

.. math::
out = \\frac{x}{max(\\sqrt{\\Sigma{x^2}}, \\epsilon)}

Args:
input (oneflow.Tensor): Input Tensor
dim (int): The axis on which to apply L2 normalization. Defaults to 0.
epsilon (float): The epsilon value is used to avoid division by zero. Defaults to 1e-12.

Returns:
oneflow.Tensor: The normalized Tensor

For example:

.. code-block:: python

>>> import oneflow as flow
>>> x = flow.tensor([[1, 2], [3, 4]], dtype=flow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 0)
>>> out
tensor([[0.3162, 0.4472],
[0.9487, 0.8944]], dtype=oneflow.float32)
>>> out = flow.nn.functional.l2_normalize(x, 1)
>>> out
tensor([[0.4472, 0.8944],
[0.6000, 0.8000]], dtype=oneflow.float32)

""",
)
1 change: 0 additions & 1 deletion python/oneflow/nn/functional/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
from oneflow._C import triplet_margin_loss
from oneflow._C import ctc_greedy_decoder
from oneflow._C import one_hot
from oneflow._C import l2_normalize
from oneflow._C import normalize
from oneflow.nn.modules.sparse import embedding
from oneflow.nn.modules.linear import linear
Expand Down
119 changes: 1 addition & 118 deletions python/oneflow/test/modules/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,131 +15,14 @@
"""

import unittest
from collections import OrderedDict
from test_util import GenArgList
from oneflow.test_utils.automated_test_util import *
import numpy as np
import oneflow as flow
import oneflow.unittest


def _count(shape, begin_axis, end_axis):
cnt = 1
for i in range(begin_axis, end_axis):
cnt *= shape[i]
return cnt


def _l2_norm_numpy(x, dim, epsilon=1e-12):
axes = [k for k in range(len(list(x.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

x = np.transpose(x, axes_tuple)

square_x_sum_shape = list(x.shape)
square_x_sum_shape[0] = 1

c = x.shape[0]
n = int(x.size / c)
d = _count(x.shape, 1, len(x.shape))

square_x_sum = np.zeros(square_x_sum_shape)

square_x_sum_flatten = square_x_sum.reshape(-1)
in_flatten = x.reshape(-1)
out = np.zeros(x.size)

for i in range(0, n):
offset = int(int((i / d)) * d * c + (i % d))
for j in range(0, c):
item = in_flatten[offset + j * d]
square_x_sum_flatten[i] = square_x_sum_flatten[i] + item * item

norm = np.sqrt(np.maximum(square_x_sum_flatten[i], epsilon))
for j in range(0, c):
index = offset + j * d
out[index] = in_flatten[index] / norm

square_x_sum = square_x_sum_flatten.reshape(square_x_sum.shape)
out = out.reshape(x.shape)
return np.transpose(out, axes_tuple), np.transpose(square_x_sum, axes_tuple)


def _l2_norm_backward_np(dy, y, square_x_sum, dim, epsilon=1e-12):
axes = [k for k in range(len(list(y.shape)))]
axes[0], axes[dim] = axes[dim], axes[0]
axes_tuple = tuple(axes)

dy = np.transpose(dy, axes_tuple)
y = np.transpose(y, axes_tuple)
square_x_sum = np.transpose(square_x_sum, axes_tuple)

c = dy.shape[0]
n = int(dy.size / c)
d = _count(dy.shape, 1, len(y.shape))

dx = np.zeros(dy.shape).reshape(-1)
dy_flatten = dy.reshape(-1)
y_flatten = y.reshape(-1)
square_x_sum_flatten = square_x_sum.reshape(-1)

for i in range(0, n):
norm = np.sqrt(np.maximum(square_x_sum_flatten[i], epsilon))
offset = int(int(int((i / d)) * d * c) + (i % d))
if square_x_sum_flatten[i] >= epsilon:
y_dy_inner_prod = 0
for j in range(0, c):
index = offset + j * d
y_dy_inner_prod = y_dy_inner_prod + dy_flatten[index] * y_flatten[index]
for j in range(0, c):
index = offset + j * d
dx[index] = (1 / norm) * (
dy_flatten[index] - y_dy_inner_prod * y_flatten[index]
)
else:
for j in range(0, c):
index = offset + j * d
dx[index] = (1 / norm) * dy_flatten[index]

return np.transpose(dx.reshape(y.shape), axes_tuple)


def _test_l2_normalize(test_case, device, dim, shape):
input = np.random.randn(*shape)
np_out, square_x_sum = _l2_norm_numpy(input, dim)
of_input = flow.tensor(
input, dtype=flow.float32, requires_grad=True, device=flow.device(device)
)
of_out = flow.nn.functional.l2_normalize(of_input, dim)

test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))

z = of_out.sum()
z.backward()
dx = _l2_norm_backward_np(np.ones(np_out.shape), np_out, square_x_sum, dim)
test_case.assertTrue(np.allclose(of_input.grad.numpy(), dx, 1e-4, 1e-4))


@flow.unittest.skip_unless_1n1d()
class TestL2Normalize(flow.unittest.TestCase):
def test_l2_normalize(test_case):
arg_dict = OrderedDict()
arg_dict["test_fun"] = [
_test_l2_normalize,
]
arg_dict["device"] = ["cpu", "cuda"]
arg_dict["dim"] = [0, 1, 2, 3]
arg_dict["shape"] = [
(10, 10, 20, 30),
]
for arg in GenArgList(arg_dict):
arg[0](test_case, *arg[1:])


@flow.unittest.skip_unless_1n1d()
class TestFunctionalNormalize(flow.unittest.TestCase):
@autotest(check_graph=False)
@autotest()
def test_functional_normalize(test_case):
device = random_device()
ndim = random(low=2)
Expand Down