[BUGFIX] fix unknown parameter shapes when np_shape is turned on. (#1…

…5097) * fix. * add test. * fix test. * check unknown shape correctly. * fix test. * fix. * fix. * add more comments. * add doc.
apache · Jun 2, 2019 · 360f8d0 · 360f8d0
1 parent 52f77c9
commit 360f8d0
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 4 deletions.
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
@@ -30,7 +30,8 @@
 from .. import symbol, ndarray, initializer, context
 from ..context import Context, cpu
 from .. import autograd
-from .utils import _indent, _brief_print_list
+from .utils import _indent, _brief_print_list, shape_is_known
+from .. import is_np_shape
 
 # pylint: disable= invalid-name
 tensor_types = (symbol.Symbol, ndarray.NDArray)
@@ -156,7 +157,20 @@ def grad_req(self, req):
 
     @property
     def shape(self):
-        return self._shape
+        """The shape of the parameter.
+
+        By default, an unknown dimension size is 0. However, when the NumPy semantic
+        is turned on, unknown dimension size is -1.
+        """
+        if self._shape is None:
+            return None
+        elif is_np_shape():
+            # Parameters shouldn't be zero-size. If one of its dimension is 0,
+            # it means the parameter isn't initialized. In the NumPy semantics,
+            # the unknown dimension should be marked with -1.
+            return tuple(i if i != 0 else -1 for i in self._shape)
+        else:
+            return self._shape
 
     @shape.setter
     def shape(self, new_shape):
@@ -269,7 +283,7 @@ def _finish_deferred_init(self):
             return
         init, ctx, default_init, data = self._deferred_init
         self._deferred_init = ()
-        assert self.shape is not None and np.prod(self.shape) > 0, \
+        assert shape_is_known(self.shape), \
             "Cannot initialize Parameter '%s' because it has " \
             "invalid shape: %s. Please specify in_units, " \
             "in_channels, etc for `Block`s."%(
@@ -380,7 +394,7 @@ def initialize(self, init=None, ctx=None, default_init=initializer.Uniform(),
             ctx = [ctx]
         if init is None:
             init = default_init if self.init is None else self.init
-        if not self.shape or np.prod(self.shape) <= 0:
+        if not shape_is_known(self.shape):
             if self._allow_deferred_init:
                 self._deferred_init = (init, ctx, default_init, None)
                 return

diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py
@@ -38,6 +38,7 @@ class requests_failed_to_import(object):
 import numpy as np
 
 from .. import ndarray
+from ..util import is_np_shape
 
 def split_data(data, num_slice, batch_axis=0, even_split=True):
     """Splits an NDArray into `num_slice` slices along `batch_axis`.
@@ -412,3 +413,20 @@ def __enter__(self):
 
     def __exit__(self, ptype, value, trace):
         self.detach()
+
+def shape_is_known(shape):
+    """Check whether a shape is completely known with or without np semantics.
+
+    Please see the doc of is_np_shape for more details.
+    """
+    if shape is None:
+        return False
+    unknown_dim_size = -1 if is_np_shape() else 0
+    if len(shape) == 0:
+        return unknown_dim_size == -1
+    for dim_size in shape:
+        if dim_size == unknown_dim_size:
+            return False
+        assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \
+                                            "received {}".format(unknown_dim_size, dim_size)
+    return True
diff --git a/python/mxnet/util.py b/python/mxnet/util.py
@@ -89,6 +89,10 @@ def is_np_shape():
     the shapes of zero-size tensors. This is turned off by default for keeping
     backward compatibility.
 
+    In the NumPy shape semantics, `-1` indicates an unknown size. For example,
+    `(-1, 2, 2)` means that the size of the first dimension is unknown. Its size
+    may be inferred during shape inference.
+
     Please note that this is designed as an infrastructure for the incoming
     MXNet-NumPy operators. Legacy operators registered in the modules
     `mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts

diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
@@ -2726,6 +2726,22 @@ def hybrid_forward(self, F, x):
             net = Net(act0, act1, shape, slice)
             check_layer_forward_withinput(net, x)
 
+@with_seed()
+def test_np_shape_parameters():
+    class Foo(gluon.Block):
+        def __init__(self, **kwargs):
+            super(Foo, self).__init__(**kwargs)
+            self.dense = gluon.nn.Dense(16)
+        def forward(self, x):
+            return self.dense(x)
+
+    with mx.np_shape(True):
+        z = mx.nd.zeros((2,2016))
+        print(z.shape)
+        foo = Foo()
+        foo.initialize()
+        print(foo(z).shape)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()