Skip to content

Commit

Permalink
[numpy] Fix d2l performance regression (apache#15173)
Browse files Browse the repository at this point in the history
* Add np array adapter decorator for layers

* Fix performance regression caused by too many conversions between nd.NDArray and np.ndarray

* Fix pylint

* Fix test backward compatibility issue

* Fix test_lambda
  • Loading branch information
reminisce authored and haojin2 committed Jul 28, 2019
1 parent b38e321 commit 70e3163
Show file tree
Hide file tree
Showing 20 changed files with 174 additions and 127 deletions.
8 changes: 3 additions & 5 deletions python/mxnet/gluon/data/vision/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ...nn import Sequential, HybridSequential
from .... import image
from ....base import numeric_types
from ....util import is_np_array
from ...utils import _adapt_np_array


class Compose(Sequential):
Expand Down Expand Up @@ -134,11 +134,9 @@ class ToTensor(HybridBlock):
def __init__(self):
super(ToTensor, self).__init__()

@_adapt_np_array
def hybrid_forward(self, F, x):
if is_np_array():
x = x.as_classic_ndarray()
out = F.image.to_tensor(x)
return out.as_np_ndarray() if is_np_array() else out
return F.image.to_tensor(x)


class Normalize(HybridBlock):
Expand Down
50 changes: 25 additions & 25 deletions python/mxnet/gluon/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
from .. import ndarray
from ..base import numeric_types
from .block import HybridBlock
from .utils import _to_classic_arrays, _to_np_arrays
from .utils import _adapt_np_array
from ..util import is_np_array


def _apply_weighting(F, loss, weight=None, sample_weight=None):
Expand All @@ -54,7 +55,10 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None):
Weighted loss
"""
if sample_weight is not None:
loss = F.broadcast_mul(loss, sample_weight)
if is_np_array():
loss = loss * sample_weight
else:
loss = F.broadcast_mul(loss, sample_weight)

if weight is not None:
assert isinstance(weight, numeric_types), "weight must be a number"
Expand All @@ -65,7 +69,11 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None):

def _reshape_like(F, x, y):
"""Reshapes x to the same shape as y."""
return x.reshape(y.shape) if F is ndarray else F.reshape_like(x, y)
if F is ndarray:
return x.reshape(y.shape)
elif is_np_array():
F = F.npx
return F.reshape_like(x, y)


class Loss(HybridBlock):
Expand Down Expand Up @@ -136,14 +144,16 @@ def __init__(self, weight=1., batch_axis=0, **kwargs):
super(L2Loss, self).__init__(weight, batch_axis, **kwargs)

def hybrid_forward(self, F, pred, label, sample_weight=None):
# TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
# We should rewrite this with np/npx ops.
pred, label, sample_weight = _to_classic_arrays(pred, label, sample_weight)
label = _reshape_like(F, label, pred)
loss = F.square(label - pred)
loss = F.np.square(label - pred) if is_np_array() else F.square(label - pred)
loss = _apply_weighting(F, loss, self._weight / 2, sample_weight)
out = F.mean(loss, axis=self._batch_axis, exclude=True)
return _to_np_arrays(out)
if is_np_array():
if F is ndarray:
return F.np.mean(loss, axis=tuple(range(1, loss.ndim)))
else:
return F.npx.batch_flatten(loss).mean(axis=1)
else:
return F.mean(loss, axis=self._batch_axis, exclude=True)


class L1Loss(Loss):
Expand Down Expand Up @@ -178,15 +188,12 @@ class L1Loss(Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(L1Loss, self).__init__(weight, batch_axis, **kwargs)

@_adapt_np_array
def hybrid_forward(self, F, pred, label, sample_weight=None):
# TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
# We should rewrite this with np/npx ops.
pred, label, sample_weight = _to_classic_arrays(pred, label, sample_weight)
label = _reshape_like(F, label, pred)
loss = F.abs(label - pred)
loss = _apply_weighting(F, loss, self._weight, sample_weight)
out = F.mean(loss, axis=self._batch_axis, exclude=True)
return _to_np_arrays(out)
return F.mean(loss, axis=self._batch_axis, exclude=True)


class SigmoidBinaryCrossEntropyLoss(Loss):
Expand Down Expand Up @@ -251,11 +258,8 @@ def __init__(self, from_sigmoid=False, weight=None, batch_axis=0, **kwargs):
weight, batch_axis, **kwargs)
self._from_sigmoid = from_sigmoid

@_adapt_np_array
def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None):
# TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
# We should rewrite this with np/npx ops.
pred, label, sample_weight, pos_weight =\
_to_classic_arrays(pred, label, sample_weight, pos_weight)
label = _reshape_like(F, label, pred)
if not self._from_sigmoid:
if pos_weight is None:
Expand All @@ -277,8 +281,7 @@ def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None):
loss = -(F.broadcast_mul(F.log(pred + eps) * label, pos_weight)
+ F.log(1. - pred + eps) * (1. - label))
loss = _apply_weighting(F, loss, self._weight, sample_weight)
out = F.mean(loss, axis=self._batch_axis, exclude=True)
return _to_np_arrays(out)
return F.mean(loss, axis=self._batch_axis, exclude=True)


SigmoidBCELoss = SigmoidBinaryCrossEntropyLoss
Expand Down Expand Up @@ -354,10 +357,8 @@ def __init__(self, axis=-1, sparse_label=True, from_logits=False, weight=None,
self._sparse_label = sparse_label
self._from_logits = from_logits

@_adapt_np_array
def hybrid_forward(self, F, pred, label, sample_weight=None):
# TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
# We should rewrite this with np/npx ops.
pred, label = _to_classic_arrays(pred, label)
if not self._from_logits:
pred = F.log_softmax(pred, self._axis)
if self._sparse_label:
Expand All @@ -366,8 +367,7 @@ def hybrid_forward(self, F, pred, label, sample_weight=None):
label = _reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = _apply_weighting(F, loss, self._weight, sample_weight)
out = F.mean(loss, axis=self._batch_axis, exclude=True)
return _to_np_arrays(out)
return F.mean(loss, axis=self._batch_axis, exclude=True)


SoftmaxCELoss = SoftmaxCrossEntropyLoss
Expand Down
8 changes: 4 additions & 4 deletions python/mxnet/gluon/nn/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from ... import initializer
from ..block import HybridBlock
from ..utils import _to_classic_arrays, _to_np_arrays
from ...util import is_np_array


class Activation(HybridBlock):
Expand All @@ -49,9 +49,9 @@ def _alias(self):
return self._act_type

def hybrid_forward(self, F, x):
x = _to_classic_arrays(x)
out = F.Activation(x, act_type=self._act_type, name='fwd')
return _to_np_arrays(out)
if is_np_array():
F = F.npx
return F.Activation(x, act_type=self._act_type, name='fwd')

def __repr__(self):
s = '{name}({_act_type})'
Expand Down
23 changes: 14 additions & 9 deletions python/mxnet/gluon/nn/basic_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@

from .activations import Activation
from ..block import Block, HybridBlock
from ..utils import _indent, _to_classic_arrays, _to_np_arrays
from ..utils import _indent, _adapt_np_array
from ... import nd, sym
from ...util import is_np_array


class Sequential(Block):
Expand Down Expand Up @@ -218,14 +219,13 @@ def __init__(self, units, activation=None, use_bias=True, flatten=True,
self.act = None

def hybrid_forward(self, F, x, weight, bias=None):
# TODO(junwu): This is a temp solution to reuse legacy ops for np.ndarray.
# We should rewrite this with np/npx ops.
x, weight, bias = _to_classic_arrays(x, weight, bias)
if is_np_array():
F = F.npx
act = F.FullyConnected(x, weight, bias, no_bias=bias is None, num_hidden=self._units,
flatten=self._flatten, name='fwd')
if self.act is not None:
act = self.act(act)
return _to_np_arrays(act)
return act

def __repr__(self):
s = '{name}({layout}, {act})'
Expand Down Expand Up @@ -265,13 +265,12 @@ def __init__(self, rate, axes=(), **kwargs):
self._rate = rate
self._axes = axes

@_adapt_np_array
def hybrid_forward(self, F, x):
x = _to_classic_arrays(x)
if self._rate > 0:
out = F.Dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False)
return F.Dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False)
else:
out = F.identity(x)
return _to_np_arrays(out)
return F.identity(x)

def __repr__(self):
s = '{name}(p = {_rate}, axes={_axes})'
Expand Down Expand Up @@ -361,6 +360,7 @@ def cast(self, dtype):
dtype = 'float32'
super(BatchNorm, self).cast(dtype)

@_adapt_np_array
def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
return F.BatchNorm(x, gamma, beta, running_mean, running_var,
name='fwd', **self._kwargs)
Expand Down Expand Up @@ -414,6 +414,7 @@ def __init__(self, input_dim, output_dim, dtype='float32',
init=weight_initializer, dtype=dtype,
allow_deferred_init=True, grad_stype=grad_stype)

@_adapt_np_array
def hybrid_forward(self, F, x, weight):
return F.Embedding(x, weight, name='fwd', **self._kwargs)

Expand All @@ -435,6 +436,7 @@ class Flatten(HybridBlock):
def __init__(self, **kwargs):
super(Flatten, self).__init__(**kwargs)

@_adapt_np_array
def hybrid_forward(self, F, x):
return F.Flatten(x)

Expand Down Expand Up @@ -520,6 +522,7 @@ def __init__(self, axis=1, epsilon=1e-5, center=True, scale=False,
shape=(in_channels,), init=beta_initializer,
allow_deferred_init=True)

@_adapt_np_array
def hybrid_forward(self, F, x, gamma, beta):
if self._axis == 1:
return F.InstanceNorm(x, gamma, beta,
Expand Down Expand Up @@ -608,6 +611,7 @@ def __init__(self, axis=-1, epsilon=1e-5, center=True, scale=True,
shape=(in_channels,), init=beta_initializer,
allow_deferred_init=True)

@_adapt_np_array
def hybrid_forward(self, F, data, gamma, beta):
norm_data = F.LayerNorm(data, gamma=gamma, beta=beta, axis=self._axis, eps=self._epsilon)
return norm_data
Expand Down Expand Up @@ -792,6 +796,7 @@ def __init__(self, function, prefix=None):
"Unrecognized function in lambda: {} of type {}"
.format(function, type(function)))

@_adapt_np_array
def hybrid_forward(self, F, x, *args):
return self._func(F, x, *args)

Expand Down
38 changes: 32 additions & 6 deletions python/mxnet/gluon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class requests_failed_to_import(object):
import numpy as np

from .. import ndarray
from ..util import is_np_shape, is_np_array
from ..util import is_np_shape, is_np_array, wraps_safely


def split_data(data, num_slice, batch_axis=0, even_split=True):
Expand Down Expand Up @@ -459,7 +459,7 @@ def _check_same_symbol_type(symbols):
'symbols in the list to numpy symbols by calling `as_np_ndarray()` '
'on each of them; if you want classic ndarray output(s) from the '
'computation graph, please convert all the numpy symbols in the list '
'to classic symbols by calling `as_classic_ndarray()` on each of them.')
'to classic symbols by calling `as_nd_ndarray()` on each of them.')
return np_symbol if is_np_sym else classic_symbol


Expand All @@ -474,16 +474,24 @@ def _check_all_np_ndarrays(out):
'{}'.format(str(type(array))))


def _to_classic_arrays(*args):
def _to_classic_arrays(*args, **kwargs):
"""Convert arrays to classic arrays. This is used in a Gluon layer for converting
inputs of np arrays to classic arrays so that the layer built with legacy ops can still
be used in np_array semantics."""
from ..numpy import ndarray as np_ndarray
from ..symbol.numpy import _Symbol as np_symbol
num_inputs = len(args)
assert num_inputs != 0
if not is_np_array():
return args[0] if num_inputs == 1 else args
in_arrs = [arr if arr is None else arr.as_classic_ndarray() for arr in args]
return in_arrs[0] if num_inputs == 1 else in_arrs
return args, kwargs
in_arrs = [arr if arr is None else arr.as_nd_ndarray() for arr in args]
new_kwargs = {}
for k, v in kwargs.items():
if isinstance(v, (np_ndarray, np_symbol)):
new_kwargs[k] = v.as_nd_ndarray()
else:
new_kwargs[k] = v
return in_arrs, new_kwargs


def _to_np_arrays(*args):
Expand All @@ -496,3 +504,21 @@ def _to_np_arrays(*args):
return args[0] if num_outputs == 1 else args
out = [arr.as_np_ndarray() for arr in args]
return out[0] if num_outputs == 1 else out


# TODO(junwu): This is a temp solution for allowing basic layers
# implemented using legacy ops to accept np.ndarrays as inputs and return
# np.ndarrays as outputs. We should remove it after changing all the layers
# to use np ops in np_array semantics in the future.
def _adapt_np_array(func):
@wraps_safely(func)
def _with_np_array(*args, **kwargs):
assert len(args) > 2, "expect at least three arguments in args"
if is_np_array():
input_args, kwargs = _to_classic_arrays(*args[2:], **kwargs)
input_args = list(args[0:2]) + input_args
out = func(*input_args, **kwargs)
return _to_np_arrays(out)
else:
return func(*args, **kwargs)
return _with_np_array
4 changes: 2 additions & 2 deletions python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def as_np_ndarray(self):
check_call(_LIB.MXShallowCopyNDArray(self.handle, ctypes.byref(hdl)))
return ndarray(handle=hdl, writable=self.writable)

def as_classic_ndarray(self):
def as_nd_ndarray(self):
"""A convenience function for creating a classic ndarray from the current
ndarray with zero copy. For this class, it just returns itself since it is
already a classic ndarray."""
Expand Down Expand Up @@ -962,7 +962,7 @@ def _at(self, idx):
% (idx-length, length))
check_call(_LIB.MXNDArrayAt(
self.handle, mx_uint(idx), ctypes.byref(handle)))
return NDArray(handle=handle, writable=self.writable)
return self.__class__(handle=handle, writable=self.writable)

def reshape(self, *shape, **kwargs):
"""Returns a **view** of this array with a new shape without altering any data.
Expand Down
Loading

0 comments on commit 70e3163

Please sign in to comment.