diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 5b774055d0e6..088117e6d424 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -170,7 +170,7 @@ typedef int (*CustomOpFBFunc)(int /*size*/, void** /*ptrs*/, int* /*tags*/, typedef int (*CustomOpDelFunc)(void* /*state*/); typedef int (*CustomOpListFunc)(char*** /*args*/, void* /*state*/); typedef int (*CustomOpInferShapeFunc)(int /*num_input*/, int* /*ndims*/, - unsigned** /*shapes*/, void* /*state*/); + int** /*shapes*/, void* /*state*/); typedef int (*CustomOpInferStorageTypeFunc)(int /*num_input*/, int* /*stypes*/, void* /*state*/); typedef int (*CustomOpBackwardInferStorageTypeFunc)(int /*num_input*/, int * /*stypes*/, @@ -1036,6 +1036,19 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr); * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXAutogradIsTraining(bool* curr); +/*! + * \brief get whether numpy compatibility is on + * \param curr returns the current status + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIsNumpyCompatible(bool* curr); +/*! + * \brief set numpy compatibility switch + * \param is_np_comp 1 when numpy compatibility is on, 0 when off + * \param prev returns the previous status before this set + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXSetIsNumpyCompatible(int is_np_comp, int* prev); /*! * \brief mark NDArrays as variables to compute gradient for autograd * \param num_var number of variable NDArrays diff --git a/include/mxnet/imperative.h b/include/mxnet/imperative.h index 52cedb2fadd9..ad209913ac53 100644 --- a/include/mxnet/imperative.h +++ b/include/mxnet/imperative.h @@ -97,6 +97,16 @@ class Imperative { is_recording_ = is_recording; return old; } + /*! brief whether numpy compatibility is on. */ + bool is_np_comp() const { + return is_np_comp_; + } + /*! brief turn on or turn off numpy compatibility switch. */ + bool set_is_np_comp(bool is_np_comp) { + bool old = is_np_comp_; + is_np_comp_ = is_np_comp; + return old; + } /*! \brief to record operator, return corresponding node. */ void RecordOp(nnvm::NodeAttrs&& attrs, const std::vector& inputs, @@ -165,9 +175,15 @@ class Imperative { #if DMLC_CXX11_THREAD_LOCAL static thread_local bool is_train_; static thread_local bool is_recording_; + // TOOD(junwu): Added numpy compatibility switch for backward compatibility. + // Delete it in the next major release. + static thread_local bool is_np_comp_; #else static MX_THREAD_LOCAL bool is_train_; static MX_THREAD_LOCAL bool is_recording_; + // TOOD(junwu): Added numpy compatibility switch for backward compatibility. + // Delete it in the next major release. + static MX_THREAD_LOCAL bool is_np_comp_; #endif /*! \brief node count used for naming */ std::atomic node_count_{0}; diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index 49852f73fbac..d83e843033e3 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -607,12 +607,36 @@ class TShape : public Tuple { #endif }; +/*! brief check if a shape's ndim is known. */ +inline bool ndim_is_known(const int ndim) { + CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim; + return ndim != -1; +} + +/*! brief check if a shape's ndim is known. */ +inline bool ndim_is_known(const TShape& x) { + return ndim_is_known(x.ndim()); +} + +/*! brief check if a shape's dim size is known. */ +inline bool dim_size_is_known(const int dim_size) { + CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size; + return dim_size != -1; +} + +/*! brief check if a shape's dim size is known. */ +inline bool dim_size_is_known(const TShape& x, const int idx) { + CHECK(idx >= 0 && idx < x.ndim()) + << "idx = " << idx << " exceeds shape dimension range [0, " << x.ndim() << ")"; + return dim_size_is_known(x[idx]); +} + /*! brief check if shape is known using the NumPy compatible definition. * zero-dim and zero-size tensors are valid. -1 means unknown.*/ inline bool shape_is_known(const TShape& x) { - if (x.ndim() == -1) return false; + if (!ndim_is_known(x)) return false; for (int i = 0; i < x.ndim(); ++i) { - if (x[i] == -1) return false; + if (!dim_size_is_known(x, i)) return false; } return true; } diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 89fc00801ea3..4eb87ebca62c 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -1852,7 +1852,10 @@ def shape(self): pdata = ctypes.POINTER(mx_int)() check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) - return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index + if ndim.value == -1: + return None + else: + return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index @property diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py index b1139a05791d..e0dfda10113e 100644 --- a/python/mxnet/numpy/__init__.py +++ b/python/mxnet/numpy/__init__.py @@ -17,4 +17,50 @@ # specific language governing permissions and limitations # under the License. +import ctypes +from ..base import _LIB, check_call + __all__ = [] + + +def set_np_comp(is_np_comp): + prev = ctypes.c_int() + check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(is_np_comp), ctypes.byref(prev))) + return bool(prev.value) + + +def is_np_comp(): + curr = ctypes.c_bool() + check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr))) + return curr.value + + +class _NumpyCompatibilityStateScope(object): + """Scope for managing numpy compatibility state. + + Example:: + + with _NumpyCompatibilityStateScope(True): + y = model(x) + backward([y]) + + """ + def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name + self._enter_is_np_comp = is_np_comp + self._prev_is_np_comp = None + + def __enter__(self): + if self._enter_is_np_comp is not None: + self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp) + + def __exit__(self, ptype, value, trace): + if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp: + set_np_comp(self._prev_is_np_comp) + + +def enable_np_comp(): + return _NumpyCompatibilityStateScope(True) + + +def disable_np_comp(): + return _NumpyCompatibilityStateScope(False) diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py index e8fa571d44db..2c69b9b46521 100644 --- a/python/mxnet/operator.py +++ b/python/mxnet/operator.py @@ -28,7 +28,7 @@ from ctypes import CFUNCTYPE, POINTER, Structure, pointer from ctypes import c_void_p, c_int, c_char, c_char_p, cast, c_bool -from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf +from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf, mx_int from .base import c_str, mx_uint, mx_float, ctypes2numpy_shared, NDArrayHandle, py_str from . import symbol, context from .ndarray import NDArray, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP @@ -164,7 +164,7 @@ def get_symbol(self, *args, **kwargs): fb_functype = CFUNCTYPE(None, c_int, POINTER(POINTER(mx_float)), POINTER(c_int), POINTER(POINTER(mx_uint)), POINTER(c_int), c_void_p) infer_functype = CFUNCTYPE(None, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) list_functype = CFUNCTYPE(None, POINTER(POINTER(POINTER(c_char))), c_void_p) class NumpyOpInfo(Structure): """Structure that holds Callback information. Passed to NumpyOpProp""" @@ -214,9 +214,9 @@ def infer_shape_entry(num_tensor, tensor_dims, assert len(ishape) == n_in rshape = list(ishape) + list(oshape) for i in range(n_in+n_out): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) def list_outputs_entry(out, _): @@ -266,7 +266,7 @@ def __init__(self, need_top_grad=True): def get_symbol(self, *args, **kwargs): fb_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_void_p), POINTER(c_int), c_void_p) infer_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) list_functype = CFUNCTYPE(c_bool, POINTER(POINTER(POINTER(c_char))), c_void_p) deps_functype = CFUNCTYPE(c_bool, c_int_p, c_int_p, c_int_p, c_int_p, POINTER(c_int_p), c_void_p) @@ -335,9 +335,9 @@ def infer_shape_entry(num_tensor, tensor_dims, assert len(ishape) == n_in rshape = list(ishape) + list(oshape) for i in range(n_in+n_out): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) except Exception: print('Error in NDArrayOp.infer_shape: %s' % traceback.format_exc()) @@ -698,7 +698,7 @@ def do_register(prop_cls): del_functype = CFUNCTYPE(c_int, c_void_p) infershape_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) infertype_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p) inferstorage_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p) inferstorage_backward_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), \ @@ -747,9 +747,9 @@ def infer_shape_entry(num_tensor, tensor_dims, "shapes, got %d."%(n_aux, len(ashape)) rshape = list(ishape) + list(oshape) + list(ashape) for i in range(n_in+n_out+n_aux): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) infer_shape_entry._ref_holder = [tensor_shapes] diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index 01b851c43f88..73d4babe6f27 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -42,6 +42,7 @@ from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from ..ndarray import _ndarray_cls from ..executor import Executor +from ..numpy import is_np_comp from . import _internal from . import op from ._internal import SymbolBase, _set_symbol_class @@ -1078,7 +1079,11 @@ def infer_shape(self, *args, **kwargs): arg_names = self.list_arguments() unknowns = [] for name, shape in zip(arg_names, arg_shapes): - if not shape or not _numpy.prod(shape): + if is_np_comp(): + shape_is_none = not shape or -1 in shape + else: + shape_is_none = not shape or 0 in shape + if shape_is_none: if len(unknowns) >= 10: unknowns.append('...') break @@ -1204,12 +1209,15 @@ def _infer_shape_impl(self, partial, *args, **kwargs): ctypes.byref(aux_shape_data), ctypes.byref(complete))) if complete.value != 0: - arg_shapes = [ - tuple(arg_shape_data[i][:arg_shape_ndim[i]]) for i in range(arg_shape_size.value)] - out_shapes = [ - tuple(out_shape_data[i][:out_shape_ndim[i]]) for i in range(out_shape_size.value)] - aux_shapes = [ - tuple(aux_shape_data[i][:aux_shape_ndim[i]]) for i in range(aux_shape_size.value)] + arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]]) + if arg_shape_ndim[i] >= 0 else None + for i in range(arg_shape_size.value)] + out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]]) + if out_shape_ndim[i] >= 0 else None + for i in range(out_shape_size.value)] + aux_shapes = [tuple(aux_shape_data[i][:aux_shape_ndim[i]]) + if aux_shape_ndim[i] >= 0 else None + for i in range(aux_shape_size.value)] return (arg_shapes, out_shapes, aux_shapes) else: return (None, None, None) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index dd5dc8d4c491..69468f33ed07 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -44,9 +44,11 @@ #include "mxnet/rtc.h" #include "mxnet/storage.h" #include "mxnet/libinfo.h" +#include "mxnet/imperative.h" #include "./c_api_common.h" #include "../operator/custom/custom-inl.h" #include "../operator/tensor/matrix_op-inl.h" +#include "../common/utils.h" using namespace mxnet; @@ -499,15 +501,23 @@ int MXNDArrayGetShape(NDArrayHandle handle, API_BEGIN(); NDArray *arr = static_cast(handle); if (!arr->is_none()) { - const mxnet::TShape &s = arr->shape(); + mxnet::TShape s = arr->shape(); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToLegacyShape(&s); + } *out_dim = s.ndim(); - CHECK_GE(s.ndim(), 0); - std::vector& buffer = ret->arg_shape_buffer; - buffer.resize(s.ndim()); - mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); - *out_pdata = buffer.data(); + if (s.ndim() >= 0) { + std::vector &buffer = ret->arg_shape_buffer; + buffer.resize(s.ndim()); + mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); + *out_pdata = buffer.data(); + } } else { - *out_dim = 0; + if (Imperative::Get()->is_np_comp()) { + *out_dim = -1; + } else { + *out_dim = 0; + } } API_END(); } diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 690a1eae1055..329dc9adc7cf 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -91,7 +91,7 @@ struct MXAPIThreadLocalEntry { data->resize(shapes.size()); size_t size = 0; for (const auto& s : shapes) { - CHECK_GE(s.ndim(), 0); + if (s.ndim() > 0); size += s.ndim(); } buffer->resize(size); diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc index d0006383bdc8..fc59463ab689 100644 --- a/src/c_api/c_api_executor.cc +++ b/src/c_api/c_api_executor.cc @@ -25,8 +25,10 @@ #include #include #include +#include #include "./c_api_common.h" #include "../executor/graph_executor.h" +#include "../common/utils.h" #if MXNET_USE_TENSORRT #include "../executor/trt_graph_executor.h" #endif // MXNET_USE_TENSORRT @@ -416,6 +418,11 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, CHECK(p.second) << "Duplicate shapes are provided for argument " << provided_arg_shape_names[i] << " in simple_bind"; } + if (!Imperative::Get()->is_np_comp()) { + for (auto &kv : arg_shape_map) { + common::ConvertToNumpyShape(&kv.second); + } + } // create para name set for sharing data array memory std::unordered_set shared_arg_name_set(num_shared_arg_names); diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index 18f6c411e039..0e136b03ecd7 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -276,6 +276,18 @@ int MXAutogradSetIsRecording(int is_recording, int* prev) { API_END(); } +int MXIsNumpyCompatible(bool* curr) { + API_BEGIN(); + *curr = Imperative::Get()->is_np_comp(); + API_END(); +} + +int MXSetIsNumpyCompatible(int is_np_comp, int* prev) { + API_BEGIN(); + *prev = Imperative::Get()->set_is_np_comp(static_cast(is_np_comp)); + API_END(); +} + int MXAutogradMarkVariables(mx_uint num_var, NDArrayHandle *var_handles, mx_uint *reqs_array, diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 8b961f46c39e..98034417abae 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -24,6 +24,7 @@ */ #include "mxnet/base.h" #include "mxnet/c_api.h" +#include "mxnet/imperative.h" #include "nnvm/c_api.h" #include "nnvm/pass.h" #include "nnvm/pass_functions.h" @@ -543,8 +544,14 @@ int MXSymbolInferShape(SymbolHandle sym, throw dmlc::Error(err.msg); } + // if use legacy shape definition, need to convert numpy shape to legacy shape + mxnet::ShapeVector shapes = g.GetAttr("shape"); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToLegacyShape(&shapes); + } + // copy back - CopyAttr(g.indexed_graph(), g.GetAttr("shape"), + CopyAttr(g.indexed_graph(), shapes, &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes)); // copy data back diff --git a/src/common/utils.h b/src/common/utils.h index 8e6966952890..f3df2e15ec32 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -734,6 +734,60 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) { } } +/*! + * \brief If numpy compatibility is turned off (default), the shapes passed in + * by users follow the legacy shape definition: + * 1. 0 ndim means the shape is completely unknown. + * 2. 0 dim size means the dim size is unknown. + * We need to convert those shapes to use the numpy shape definition: + * 1. 0 ndim means it's a scalar tensor. + * 2. -1 ndim means the shape is unknown. + * 3. 0 dim size means no elements in that dimension. + * 4. -1 dim size means the dimension's size is unknown. + * so that operator's infer shape function can work in backend. + * \param shape to be converted. + */ +inline void ConvertToNumpyShape(mxnet::TShape* shape) { + if (shape->ndim() == 0) { // legacy shape ndim = 0 means unknown + *shape = mxnet::TShape(); // unknown shape ndim = -1 + } else { + for (int j = 0; j < shape->ndim(); ++j) { + CHECK_GE((*shape)[j], 0) << "Legacy shape cannot have dim size < 0"; + if ((*shape)[j] == 0) { // legacy shape dim_size = 0 means unknown + (*shape)[j] = -1; // unknown dim size = -1 + } + } + } +} + +inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) { + for (size_t i = 0; i < shapes->size(); ++i) { + ConvertToNumpyShape(&(shapes->at(i))); + } +} + +/*! + * \brief This is function is used to convert shapes returned by + * the infer shape functions/pass to the legacy shape definition. + */ +inline void ConvertToLegacyShape(mxnet::TShape* shape) { + if (!mxnet::ndim_is_known(*shape)) { + *shape = mxnet::TShape(0); + } else { + for (int j = 0; j < shape->ndim(); ++j) { + if (!mxnet::dim_size_is_known(*shape, j)) { + (*shape)[j] = 0; + } + } + } +} + +inline void ConvertToLegacyShape(mxnet::ShapeVector* shapes) { + for (size_t i = 0; i < shapes->size(); ++i) { + ConvertToLegacyShape(&(shapes->at(i))); + } +} + } // namespace common } // namespace mxnet #endif // MXNET_COMMON_UTILS_H_ diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc index 3a5c5ab9806f..fa7aee518486 100644 --- a/src/executor/infer_graph_attr_pass.cc +++ b/src/executor/infer_graph_attr_pass.cc @@ -24,6 +24,7 @@ #include #include +#include #include "./exec_pass.h" #include "../operator/operator_common.h" #include "../common/exec_utils.h" @@ -467,6 +468,12 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, std::vector ishape, oshape; // whether a shape is dynamic std::vector is_dynamic(rshape.size(), 0); + + // convert to numpy compatible shape to use operator's infer shape function + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&rshape); + } + // inference step function for nid auto infer_step = [&](uint32_t nid, bool last_iter) { const auto& inode = idx[nid]; @@ -483,6 +490,9 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, if (it != inode.source->attrs.dict.end()) { std::istringstream is(it->second); CHECK(is >> rshape[out_ent_id]) << "Invalid attribute"; + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&rshape[out_ent_id]); + } } } // assign a default value to node attribute @@ -546,7 +556,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, bool is_input_dynamic_shape = false; for (uint32_t i = 0; i < ishape.size(); ++i) { ishape[i] = rshape[idx.entry_id(inode.inputs[i])]; - if (ishape[i].ndim() == 0 && is_dynamic[idx.entry_id(inode.inputs[i])]) { + if (!mxnet::ndim_is_known(ishape[i]) && is_dynamic[idx.entry_id(inode.inputs[i])]) { is_input_dynamic_shape = true; } if (fis_none(ishape[i])) forward_known = false; @@ -563,7 +573,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, auto finfer = finfer_shape.get(inode.source->op(), fdefault); if (finfer == nullptr || is_input_dynamic_shape) { for (uint32_t i = 0; i < oshape.size(); ++i) { - if (oshape[i].ndim() == 0) { + if (!mxnet::ndim_is_known(oshape[i].ndim())) { is_dynamic[idx.entry_id(nid, i)] = 1; } } @@ -650,12 +660,12 @@ nnvm::Graph InferShape(nnvm::Graph&& graph, "shape", "shape_num_unknown_nodes", [](const mxnet::TShape& s) { return !mxnet::shape_is_known(s); }, [](const mxnet::TShape& s) { - if (s.ndim() == -1) { + if (!mxnet::ndim_is_known(s)) { return static_cast(1); } size_t ret = 0; for (const auto& val : s) { - if (val == -1) { + if (!mxnet::dim_size_is_known(val)) { ++ret; } } diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index 3e5b3987522c..b027de0a0f6f 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -25,9 +25,11 @@ namespace mxnet { #if DMLC_CXX11_THREAD_LOCAL thread_local bool Imperative::is_train_ = false; thread_local bool Imperative::is_recording_ = false; +thread_local bool Imperative::is_np_comp_ = false; #else MX_THREAD_LOCAL bool Imperative::is_train_ = false; MX_THREAD_LOCAL bool Imperative::is_recording_ = false; +MX_THREAD_LOCAL bool Imperative::is_np_comp_ = false; #endif Imperative* Imperative::Get() { @@ -109,7 +111,7 @@ OpStatePtr Imperative::Invoke( OpStatePtr ret = InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode); // the followinng loop is used for finding out the correct shape when some shapes are dynamic for (size_t i = 0; i < outputs.size(); i++) { - if (outputs[i]->shape().ndim() == 0) { + if (!shape_is_known(outputs[i]->shape())) { // the WaitToRead overhead here does not seem to be avoidable outputs[i]->WaitToRead(); outputs[i]->SetShapeFromChunk(); diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 071f4fa9dd0b..6864428d2559 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -121,7 +121,24 @@ inline void SetShapeType(const Context& ctx, if (!infershape.count(attrs.op)) { is_dynamic_shape_existing = true; } else { - CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes)); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&in_shapes); + common::ConvertToNumpyShape(&out_shapes); + } + const bool success = infershape[attrs.op](attrs, &in_shapes, &out_shapes); + if (!success) { + std::stringstream os; + os << "Operator " << attrs.op->name << " inferring shapes failed.\n"; + os << "input shapes:\n"; + for (auto& nd : inputs) { + os << nd->shape() << '\n'; + } + os << "output shapes:\n"; + for (auto& nd : outputs) { + os << nd->shape() << '\n'; + } + LOG(FATAL) << os.str(); + } CHECK_EQ(out_shapes.size(), outputs.size()); } // infer type diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h index 8016510090ab..89412357ac67 100644 --- a/src/operator/batch_norm_v1-inl.h +++ b/src/operator/batch_norm_v1-inl.h @@ -261,7 +261,7 @@ class BatchNormV1Prop : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); out_shape->clear(); diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h index a9afb8e4114e..4d278fb40645 100644 --- a/src/operator/contrib/multi_proposal-inl.h +++ b/src/operator/contrib/multi_proposal-inl.h @@ -108,7 +108,7 @@ class MultiProposalProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]"; const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb); - if (!mxnet::op::shape_is_none(dshape)) return false; + if (mxnet::op::shape_is_none(dshape)) return false; Shape<4> bbox_pred_shape; bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]); SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred, diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc index 9ba3b5471c60..4c0d67bb08f7 100644 --- a/src/operator/control_flow.cc +++ b/src/operator/control_flow.cc @@ -301,7 +301,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { mxnet::TShape shape = subg_out_shape[i]; // If we don't have shape info, we don't need to do anything. - if (!shape_is_known(shape)) + if (!mxnet::ndim_is_known(shape)) continue; subg_out_shape[i] = SliceFirstDim(shape); } @@ -317,7 +317,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { // If the output shape isn't inferred, we don't need to propogate the info. const auto& g_out_shape = subg_out_shape[i]; - if (!shape_is_known(g_out_shape)) + if (!mxnet::ndim_is_known(g_out_shape)) continue; auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1); @@ -336,7 +336,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, const auto &shape = subg_in_shape[loc]; // If the input data shape isn't inferred, we don't need to propogate the // info. - if (!shape_is_known(shape)) + if (!mxnet::ndim_is_known(shape)) continue; if (data_1d[i]) { diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h index 0d6ffd7e895e..080c718dc9bf 100644 --- a/src/operator/convolution_v1-inl.h +++ b/src/operator/convolution_v1-inl.h @@ -405,7 +405,7 @@ class ConvolutionV1Prop : public OperatorProperty { // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData]; - if (!shape_is_known(dshp)) return false; + if (!mxnet::ndim_is_known(dshp)) return false; if (param_.kernel.ndim() == 2) { // 2d conv_v1 CHECK_EQ(dshp.ndim(), 4U) \ diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 46249c9bbcc6..412bfa1bc3aa 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -128,17 +128,21 @@ bool InferShape(const NodeAttrs& attrs, const CustomParam& params = nnvm::get(attrs.parsed); size_t total = params.num_args + params.num_outs + params.num_auxs; - std::vector shapes(total); + std::vector shapes(total); std::vector ndims(total); size_t buff_size = 0; - for (const auto& i : *in_shape) buff_size += i.ndim(); - std::vector buff(buff_size); - uint32_t *ptr = buff.data(); + for (const auto& i : *in_shape) { + if (i.ndim() > 0) { + buff_size += i.ndim(); + } + } + std::vector buff(buff_size); + int *ptr = buff.data(); for (size_t i = 0; i < in_shape->size(); ++i) { shapes[i] = ptr; ndims[i] = (*in_shape)[i].ndim(); - for (size_t j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) { - *ptr = static_cast((*in_shape)[i][j]); + for (int j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) { + *ptr = (*in_shape)[i][j]; } } @@ -263,7 +267,7 @@ OpStatePtr CreateState(const NodeAttrs& attrs, Context ctx, for (size_t i = 0; i < in_shape.size(); ++i) { shapes[i] = ptr; ndims[i] = in_shape[i].ndim(); - for (size_t j = 0; j < in_shape[i].ndim(); ++j, ++ptr) { + for (int j = 0; j < in_shape[i].ndim(); ++j, ++ptr) { *ptr = static_cast(in_shape[i][j]); } } diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 590b1b428023..622952cc4bc5 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -332,7 +332,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, const int channelCount = dshape[channelAxis]; - if (!shape_is_known(dshape)) { + if (!mxnet::ndim_is_known(dshape)) { return false; } diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 5435bd815334..b534ee58e85c 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -46,7 +46,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::TShape tmp = (*in_shape)[i]; if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - has_unknown_dim_size = tmp[axis] == -1 || has_unknown_dim_size; + has_unknown_dim_size = !mxnet::dim_size_is_known(tmp, axis) || has_unknown_dim_size; size += tmp[axis]; tmp[axis] = -1; shape_assign(&dshape, tmp); @@ -91,26 +91,27 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, int axis = -1; for (int i = 0; i < param_.num_args; ++i) { mxnet::TShape tmp = (*in_shape)[i]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - if (tmp[axis] == 0) { + if (!mxnet::dim_size_is_known(tmp, axis)) { zero_indices.emplace_back(i); } else { + CHECK_GE(tmp[axis], 0); size += tmp[axis]; } - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } } mxnet::TShape tmp = (*out_shape)[0]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) @@ -120,21 +121,21 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, if (zero_indices.empty()) dshape[axis] = size; CHECK(shape_assign(&(*out_shape)[0], dshape)) << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0]; - if ((*out_shape)[0][axis] != 0 && !zero_indices.empty()) { + if ((*out_shape)[0][axis] != -1 && !zero_indices.empty()) { int residual = (*out_shape)[0][axis] - size; CHECK_GE(residual, 0) << "Input size already exceeds output size. Residual: " << residual; - CHECK(zero_indices.size() <= 2 && zero_indices.size() >= 0) + CHECK(zero_indices.size() <= 2 && zero_indices.size() > 0) << "Expecting 1 or 2 inputs that need shape inference. Got: " << zero_indices.size(); - bool need_infer = !(*out_shape)[0].Size(); + bool need_infer = !shape_is_known((*out_shape)[0]); for (int i : zero_indices) { (*in_shape)[i][axis] = residual / zero_indices.size(); - need_infer = need_infer || !(*in_shape)[i].Size(); + need_infer = need_infer || !shape_is_known((*in_shape)[i]); } return !need_infer; } - return dshape.Size() != 0; + return shape_is_known(dshape); } static bool ConcatType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index dfbc89de7b0e..536e9a731171 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -96,24 +96,28 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv::kData]; - if (!shape_is_known(dshp)) return false; + if (!mxnet::ndim_is_known(dshp)) return false; if (param_.kernel.ndim() == 1) { // 1d conv CHECK_EQ(dshp.ndim(), 3U) << "Input data should be 3D in batch-num_filter-x"; Shape<3> dshape = ConvertLayout(dshp.get<3>(), param_.layout.value(), kNCW); - Shape<3> wshape = Shape3(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, + Shape<3> wshape = Shape3(param_.num_filter / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0]); wshape = ConvertLayout(wshape, kNCW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); } const index_t dilated_ksize_x = param_.DilatedKernelSize(0); - CHECK_EQ(dshape[1] % param_.num_group, 0U) \ - << "input num_filter must divide group size"; + if (dshape[1] != -1) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) \ << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -125,21 +129,21 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<3> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<3>(), param_.layout.value(), kNCW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_x - 1 - 2 * param_.pad[0]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } return true; @@ -149,10 +153,12 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, << "Input data should be 4D in batch-num_filter-y-x"; Shape<4> dshape = ConvertLayout(dshp.get<4>(), param_.layout.value(), kNCHW); Shape<4> wshape = Shape4(param_.num_filter / param_.num_group, - dshape[1] / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0], param_.kernel[1]); wshape = ConvertLayout(wshape, kNCHW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); @@ -160,8 +166,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, const index_t dilated_ksize_y = param_.DilatedKernelSize(0); const index_t dilated_ksize_x = param_.DilatedKernelSize(1); - CHECK_EQ(dshape[1] % param_.num_group, 0U) \ - << "input num_filter must divide group size"; + if (dshape[1] != -1) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) \ << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -173,29 +180,29 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<4> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : 0; - oshape[3] = dshape[3] ? - (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<4>(), param_.layout.value(), kNCHW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param_.pad[0]; } - if (oshape[3] && param_.stride[1] == 1) { + if (oshape[3] != -1 && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param_.pad[1]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCHW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_y, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } - if (dshape[3] != 0) { + if (dshape[3] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input"; } return true; @@ -204,10 +211,13 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(dshp.ndim(), 5U) \ << "Input data should be 5D in batch-num_filter-depth-y-x"; Shape<5> dshape = ConvertLayout(dshp.get<5>(), param_.layout.value(), kNCDHW); - Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, + Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0], param_.kernel[1], param_.kernel[2]); wshape = ConvertLayout(wshape, kNCDHW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); @@ -218,8 +228,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, const index_t dilated_ksize_d = param_.DilatedKernelSize(0); const index_t dilated_ksize_y = param_.DilatedKernelSize(1); const index_t dilated_ksize_x = param_.DilatedKernelSize(2); - CHECK_EQ(dshape[1] % param_.num_group, 0U) - << "input num_filter must divide group size"; + if (dshape[1] >= 0) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -233,37 +244,37 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<5> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : 0; - oshape[3] = dshape[3] ? - (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : 0; - oshape[4] = dshape[4] ? - (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : -1; + oshape[4] = dshape[4] != -1 ? + (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input depth/height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<5>(), param_.layout.value(), kNCDHW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_d - 1 - 2 * param_.pad[0]; } - if (oshape[3] && param_.stride[1] == 1) { + if (oshape[3] != -1 && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_y - 1 - 2 * param_.pad[1]; } - if (oshape[4] && param_.stride[2] == 1) { + if (oshape[4] != -1 && param_.stride[2] == 1) { dshape[4] = oshape[4] + dilated_ksize_x - 1 - 2 * param_.pad[2]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCDHW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_d, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } - if (dshape[3] != 0) { + if (dshape[3] != -1) { CHECK_LE(dilated_ksize_y, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input"; } - if (dshape[4] != 0) { + if (dshape[4] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[4], param_.pad[2])) << "kernel size exceed input"; } return true; diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc index 1df888e4b38a..cb35ce170e8e 100644 --- a/src/operator/nn/cudnn/cudnn_batch_norm.cc +++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc @@ -37,7 +37,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_ using namespace mshadow; CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(3) = mxnet::TShape(Shape1(dshape[1])); diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc index 0e4d18b1fda8..afad6fd5cc80 100644 --- a/src/operator/nn/dropout.cc +++ b/src/operator/nn/dropout.cc @@ -95,7 +95,7 @@ Example:: CHECK_EQ(in_shape->size(), 1U); const DropoutParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; out_shape->clear(); out_shape->push_back(dshape); for (int i = 0; i < param.axes.ndim(); ++i) { diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 98277c206dd6..a097357ef5a3 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -52,7 +52,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs, mxnet::TShape dshape = (*in_shape)[fullc::kData]; mxnet::TShape oshape = (*out_shape)[0]; // require data to be known - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; index_t num_input; if (!param.flatten) { diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc index 1b0e99d8fe87..2e47503a3318 100644 --- a/src/operator/nn/layer_norm.cc +++ b/src/operator/nn/layer_norm.cc @@ -48,7 +48,7 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs, const int channelCount = dshape[axis]; - if (!shape_is_known(dshape)) { + if (!mxnet::ndim_is_known(dshape)) { return false; } diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 7c365f5081a1..3e081c9a0552 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -114,7 +114,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, << "Pooling: Input data should be 3D in (batch, channel, x)" << " Or 4D in (batch, channel, y, x) " << " Or 5D in (batch, channel, d, y, x)"; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; int layout = param.GetLayout(dshape.ndim()); if (param.global_pool) { mxnet::TShape oshape = dshape; diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index c95f859a1b5b..59f572211d0e 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -160,14 +160,14 @@ inline std::string type_string(const int& x) { * \return whether x and y are compatible. */ inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) { - if (y->ndim() == -1) { + if (!mxnet::ndim_is_known(*y)) { *y = x; return true; } else if (y->ndim() != x.ndim()) { - return x.ndim() == -1; + return !mxnet::ndim_is_known(x); } else { for (int i = 0; i < y->ndim(); ++i) { - if ((*y)[i] == -1) { + if (!mxnet::dim_size_is_known(*y, i)) { (*y)[i] = x[i]; } else if ((*y)[i] != x[i] && x[i] >= 0) { return false; diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h index 4e0ccc1caeb9..21ba270c9d42 100644 --- a/src/operator/pooling_v1-inl.h +++ b/src/operator/pooling_v1-inl.h @@ -247,7 +247,7 @@ class PoolingV1Prop : public OperatorProperty { CHECK_LE(dshape.ndim(), 5U) << "Pooling: Input data should be 4D in (batch, channel, y, x) " << "Or 5D in (batch, channel, d, y, x)"; mxnet::TShape oshape = dshape; - if (dshape.ndim() == 0) return false; + if (dshape.ndim() == -1) return false; if (param_.global_pool) { if (dshape.ndim() == 4) { oshape[2] = 1; diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc index f97807424701..2cc2ec9d0374 100644 --- a/src/operator/quantization/quantized_concat.cc +++ b/src/operator/quantization/quantized_concat.cc @@ -55,7 +55,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha shape_assign(&dshape, tmp); } - if (dshape.ndim() == -1) return false; + if (!mxnet::ndim_is_known(dshape)) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h index c97d4fdf7ced..e88b95a8bdd6 100644 --- a/src/operator/random/unique_sample_op.h +++ b/src/operator/random/unique_sample_op.h @@ -60,7 +60,7 @@ inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 2U); // output shape is known - if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == -1) { + if ((*out_attrs)[0].ndim() == 2 && !mxnet::ndim_is_known(param.shape)) { SHAPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::Shape1((*out_attrs)[0][0])); return true; } diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h index 9e612d06fe0d..f86eee79d9db 100644 --- a/src/operator/rnn-inl.h +++ b/src/operator/rnn-inl.h @@ -676,7 +676,7 @@ class RNNProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 3U) << "Input:[data, parameters, state]"; } const mxnet::TShape &dshape = (*in_shape)[rnn_enum::kData]; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; CHECK_EQ(dshape.ndim(), 3U) \ << "Input data should be rank-3 tensor of dim [sequence length, batch size, input size]"; // data: [sequence len, batch, input dimension] @@ -712,9 +712,7 @@ class RNNProp : public OperatorProperty { oshape[2] = numDirections * param_.state_size; } out_shape->push_back(oshape); - if (!param_.state_outputs) { - return true; - } else { + if (param_.state_outputs) { // outStateShape: [layer_num, batch, state size] mxnet::TShape outStateShape = dshape; outStateShape[0] = total_layers; @@ -733,8 +731,8 @@ class RNNProp : public OperatorProperty { cellStateShape[2] = param_.state_size; out_shape->push_back(cellStateShape); } - return true; } + return shape_is_known(dshape); } bool InferType(std::vector *in_type, diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h index a51b17cd324e..e37ffdcf1b91 100644 --- a/src/operator/slice_channel-inl.h +++ b/src/operator/slice_channel-inl.h @@ -195,7 +195,7 @@ class SliceChannelProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1U); mxnet::TShape dshape = in_shape->at(slice_enum::kData); mxnet::TShape ishape = in_shape->at(slice_enum::kData); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param_.axis >= 0) { CHECK_LT(param_.axis, dshape.ndim()); } else { @@ -212,15 +212,18 @@ class SliceChannelProp : public OperatorProperty { << " evenly sized chunks, but this is not possible because " << param_.num_outputs << " does not evenly divide " << dshape[real_axis]; - if (param_.squeeze_axis && ishape[real_axis] != 0) { - CHECK_EQ(ishape[real_axis], static_cast(param_.num_outputs)) + if (param_.squeeze_axis && ishape[real_axis] != -1) { + CHECK_EQ(ishape[real_axis], param_.num_outputs) << "If squeeze axis is True, the size of the sliced axis must be the same as num_outputs." << " Input shape=" << ishape << ", axis=" << real_axis << ", num_outputs=" << param_.num_outputs << "."; } - dshape[real_axis] /= param_.num_outputs; - if (param_.squeeze_axis && (dshape[real_axis] == 1 || ishape[real_axis] == 0)) { - for (int d = real_axis; d < static_cast(dshape.ndim()) - 1; ++d) { + if (dshape[real_axis] >= 0) { + dshape[real_axis] /= param_.num_outputs; + } + if (param_.squeeze_axis && (dshape[real_axis] == 1 + || !mxnet::dim_size_is_known(ishape, real_axis))) { + for (int d = real_axis; d < dshape.ndim() - 1; ++d) { dshape[d] = dshape[d+1]; } dshape = mxnet::TShape(&dshape[0], &dshape[dshape.ndim()-1]); diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index f81a232d629a..80ab40ef6c50 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -349,7 +349,7 @@ class SoftmaxOutputProp : public OperatorProperty { lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { + if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc index 262242f98004..548225f0496b 100644 --- a/src/operator/softmax_output.cc +++ b/src/operator/softmax_output.cc @@ -85,7 +85,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, const SoftmaxOutputParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; // label.shape == data.shape: use probability as label if (dshape != (*in_shape)[softmaxout_enum::kLabel]) { @@ -97,7 +97,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { + if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h index 3d651c13d8ba..dfe9fa606e95 100644 --- a/src/operator/svm_output-inl.h +++ b/src/operator/svm_output-inl.h @@ -143,7 +143,7 @@ class SVMOutputProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; mxnet::TShape label_shape(dshape.ndim() - 1, -1); for (int i = 0; i + 1 < dshape.ndim(); ++i) label_shape[i] = dshape[i]; diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index fb55fcd4d6a2..2d432c9563d6 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -339,13 +339,13 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& ishape = (*in_attrs)[0]; - if (!shape_is_known(ishape)) return false; + if (!mxnet::ndim_is_known(ishape)) return false; const BroadcastToParam& param = nnvm::get(attrs.parsed); CHECK_EQ(ishape.ndim(), param.shape.ndim()) << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape; mxnet::TShape oshape = param.shape; for (int i = 0; i < ishape.ndim(); ++i) { - if (oshape[i] != 0) { + if (oshape[i] != -1) { CHECK(ishape[i] == oshape[i] || ishape[i] == 1) << "Array cannot be broadcasted from " << ishape << " to " << param.shape; } else { @@ -364,7 +364,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& lhs_shape = (*in_attrs)[0]; mxnet::TShape& rhs_shape = (*in_attrs)[1]; - if (!shape_is_known(lhs_shape) || !shape_is_known(lhs_shape)) { + if (!mxnet::ndim_is_known(lhs_shape) || !mxnet::ndim_is_known(rhs_shape)) { return false; } @@ -378,7 +378,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, oshape = mxnet::TShape(rhs_shape); for (int i = 0; i < lhs_shape.ndim(); ++i) { - if (rhs_shape[i] != 0) { + if (rhs_shape[i] != -1) { CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1) << "Array cannot be broadcasted from " << lhs_shape << " to " << rhs_shape; } else { diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h index b90b09a36bd3..c95c1ce414f2 100644 --- a/src/operator/tensor/diag_op-inl.h +++ b/src/operator/tensor/diag_op-inl.h @@ -84,7 +84,7 @@ inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k, auto s = std::min(h, w); if (s < 0) { - s = 0; + s = -1; } if (x1 > x2) { @@ -114,7 +114,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& ishape = (*in_attrs)[0]; - if (!shape_is_known(ishape)) { + if (!mxnet::ndim_is_known(ishape)) { return false; } diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index dfb3231a75a9..64a4d7cc15ff 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -48,7 +48,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& rhs = (*in_attrs)[1]; // avoid pre-mature shape inference. - if (lhs.ndim() == -1 || rhs.ndim() == -1) return false; + if (!mxnet::ndim_is_known(lhs) || !mxnet::ndim_is_known(rhs)) return false; if (lhs == rhs) { SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs); diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index 3c4d34b3f4a5..bcad602c95c0 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -213,14 +214,13 @@ inline bool InitShape(const nnvm::NodeAttrs& attrs, const ParamType& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 1U); - if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param.shape)) return true; - for (int i=0 ; i < param.shape.ndim() ; ++i) { - if (param.shape[i] < 0U) { - LOG(FATAL) << "Shape cannot contain negative values " << param.shape; - } + mxnet::TShape param_shape = param.shape; + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(¶m_shape); } - SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape); - return true; + if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param_shape)) return true; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, param_shape); + return shape_is_known(out_attrs->at(0)); } template diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index e7b90e52d968..ff6e3545ab07 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -110,7 +110,7 @@ inline mxnet::TShape InferReshapeShape(const mxnet::Tuple& shape, CHECK_LT(src_idx, dshape_len-1); const int d1 = dshape_vec[src_idx++]; const int d2 = dshape_vec[src_idx++]; - if (d1 == -1 || d2 == -1) { + if (!mxnet::dim_size_is_known(d1) || !mxnet::dim_size_is_known(d2)) { tmp.push_back(-1); } else { tmp.push_back(d1 * d2); @@ -164,7 +164,7 @@ inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out int zero_axis = -1; int known_dim_size_prod = 1; for (int i = 0; i < in->ndim(); i++) { - if ((*in)[i] == -1) { + if (!mxnet::dim_size_is_known(*in, i)) { if (zero_axis != -1) return false; // more than 1 zero found. else @@ -185,7 +185,7 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape &dshape = (*in_attrs)[0]; - if (dshape.ndim() == -1) return false; + if (!mxnet::ndim_is_known(dshape)) return false; mxnet::TShape oshape; if (param_.shape.ndim() != 0) { oshape = InferReshapeShape(param_.shape, dshape, param_.reverse); @@ -314,7 +314,7 @@ void Transpose(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const TransposeParam& param = nnvm::get(attrs.parsed); CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; - if (param.axes.ndim() == -1) { + if (!mxnet::ndim_is_known(param.axes)) { mxnet::TShape axes(inputs[0].ndim(), -1); for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; @@ -334,7 +334,7 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& shp = (*in_attrs)[0]; CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions"; mxnet::TShape ret(shp.ndim(), -1); - if (param.axes.ndim() == -1) { + if (!mxnet::ndim_is_known(param.axes)) { for (int i = 0; i < shp.ndim(); ++i) { ret[i] = shp[shp.ndim()-1-i]; } @@ -367,7 +367,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, const ExpandDimParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if (!shape_is_known(in_attrs->at(0)) && !shape_is_known(out_attrs->at(0))) { + if (!mxnet::ndim_is_known(in_attrs->at(0)) && !mxnet::ndim_is_known(out_attrs->at(0))) { return false; } @@ -401,7 +401,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < axis; ++i) ret[i] = oshape[i]; for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i]; SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret); - return shape_is_known(ret); + return shape_is_known(in_attrs->at(0)) && shape_is_known(out_attrs->at(0)); } // Currently MKLDNN only supports step = 1 or step has no value @@ -724,7 +724,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& dshape = (*in_attrs)[0]; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; const SliceParam& param = nnvm::get(attrs.parsed); mxnet::TShape oshape = dshape; @@ -1115,9 +1115,9 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& int* axis, index_t* begin, index_t* end) { *axis = param.axis; if (*axis < 0) { - *axis += static_cast(ishape.ndim()); + *axis += ishape.ndim(); } - CHECK(*axis < static_cast(ishape.ndim()) && *axis >= 0) << + CHECK(*axis < ishape.ndim() && *axis >= 0) << "Transformed axis must be smaller than the source ndim and larger than zero! Recieved axis=" << param.axis << ", src_ndim=" << ishape.ndim() << ", transformed axis=" << *axis; index_t axis_size = static_cast(ishape[*axis]); @@ -1126,7 +1126,7 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& if (*begin < 0) { *begin += axis_size; } - if (axis_size) { + if (axis_size > 0) { if (!static_cast(param.end)) { *end = axis_size; } else { @@ -2591,7 +2591,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); mxnet::TShape dshape = in_attrs->at(split_enum::kData); mxnet::TShape ishape = in_attrs->at(split_enum::kData); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param.axis >= 0) { CHECK_LT(static_cast(param.axis), dshape.ndim()); } else { diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 2bb9596d5572..985405424dda 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -2265,7 +2265,7 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape): for i in range(len(src_shape)): holdout_src_shape = list(src_shape) - holdout_src_shape[i] = -1 + holdout_src_shape[i] = 0 holdout_src_shape = tuple(holdout_src_shape) net = mx.sym.Variable('data') net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape)) @@ -4398,7 +4398,8 @@ def test_invalid_reps(): assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3)) test_normal_case() - test_empty_tensor() + with mx.numpy.enable_np_comp(): + test_empty_tensor() test_empty_reps() test_tile_backward() test_tile_numeric_gradient() @@ -4458,7 +4459,8 @@ def test_zero_depth(): test_normal_case(index_type=np.float64) test_normal_case(index_type=np.float32) test_normal_case(index_type=np.float16) - test_empty_indices() + with mx.numpy.enable_np_comp(): + test_empty_indices() test_zero_depth()