diff --git a/benchmark/opperf/utils/profiler_utils.py b/benchmark/opperf/utils/profiler_utils.py index 1cb29a8fdec8..087746ab728d 100644 --- a/benchmark/opperf/utils/profiler_utils.py +++ b/benchmark/opperf/utils/profiler_utils.py @@ -117,7 +117,7 @@ def parse_profiler_dump(operator_name, profiler_dump): MXNDArrayFree 49 1.1220 0.0170 0.0360 0.0229 MXAutogradBackwardEx 50 11.5460 0.1980 0.3360 0.2309 MXNet C API Calls 399 1.9990 1.6010 1.9990 0.1990 - MXImperativeInvokeEx 50 4.4810 0.0700 0.1330 0.0896 + MXImperativeInvoke 50 4.4810 0.0700 0.1330 0.0896 MXNDArrayWaitAll 50 769.0570 14.0200 24.5030 15.3811 MXAutogradSetIsTraining 100 0.0190 0.0000 0.0010 0.0002 MXAutogradSetIsRecording 100 0.0400 0.0000 0.0010 0.0004 diff --git a/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md b/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md index f490aa12e6fc..08cfea115c7d 100644 --- a/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md +++ b/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md @@ -50,12 +50,12 @@ for MXNet users to do multi-threaded inference. * \brief create cached operator, allows to choose thread_safe version * of cachedop */ -MXNET_DLL int MXCreateCachedOpEX(SymbolHandle handle, - int num_flags, - const char** keys, - const char** vals, - CachedOpHandle *out, - bool thread_safe DEFAULT(false)); +MXNET_DLL int MXCreateCachedOp(SymbolHandle handle, + int num_flags, + const char** keys, + const char** vals, + CachedOpHandle *out, + bool thread_safe DEFAULT(false)); ``` ## Multithreaded inference in MXNet with C API and CPP Package @@ -135,8 +135,8 @@ The above code loads params and copies input data and params to specific context [https://github.com/apache/incubator-mxnet/example/multi_threaded_inference/multi_threaded_inference.cc#L207-L233](multi_threaded_inference.cc#L207-233) The above code prepares `flag_key_cstrs` and `flag_val_cstrs` to be passed the Cached op. -The C API call is made with `MXCreateCachedOpEX`. This will lead to creation of thread safe cached -op since the `thread_safe` (which is the last parameter to `MXCreateCachedOpEX`) is set to +The C API call is made with `MXCreateCachedOp`. This will lead to creation of thread safe cached +op since the `thread_safe` (which is the last parameter to `MXCreateCachedOp`) is set to true. When this is set to false, it will invoke CachedOp instead of CachedOpThreadSafe. @@ -146,7 +146,7 @@ true. When this is set to false, it will invoke CachedOp instead of CachedOpThre The above creates the lambda function taking the thread number as the argument. If `random_sleep` is set it will sleep for a random number (secs) generated between 0 to 5 seconds. -Following this, it invokes `MXInvokeCachedOpEx`(from the hdl it determines whether to invoke cached op threadsafe version or not). +Following this, it invokes `MXInvokeCachedOp`(from the hdl it determines whether to invoke cached op threadsafe version or not). When this is set to false, it will invoke CachedOp instead of CachedOpThreadSafe. ### Step 5: Spawn multiple threads and wait for all threads to complete @@ -179,7 +179,7 @@ The above code outputs results for different threads and cleans up the thread sa 6. Bulking of ops is not supported. 7. This only supports inference use cases currently, training use cases are not supported. 8. Graph rewrites with subgraph API currently not supported. -9. There is currently no frontend API support to run multi threaded inference. Users can use CreateCachedOpEX and InvokeCachedOp in combination with +9. There is currently no frontend API support to run multi threaded inference. Users can use CreateCachedOp and InvokeCachedOp in combination with the CPP frontend to run multi-threaded inference as of today. 10. Multi threaded inference with threaded engine with Module/Symbolic API and C Predict API are not currently supported. 11. Exception thrown with `wait_to_read` in individual threads can cause issues. Calling invoke from each thread and calling WaitAll after thread joins should still work fine. diff --git a/docs/static_site/src/pages/api/developer_guide/profiling.md b/docs/static_site/src/pages/api/developer_guide/profiling.md index 841c00891b6b..8fad066afce8 100644 --- a/docs/static_site/src/pages/api/developer_guide/profiling.md +++ b/docs/static_site/src/pages/api/developer_guide/profiling.md @@ -130,11 +130,11 @@ MXNET_C_API ================= Name Total Count Time (ms) Min Time (ms) Max Time (ms) Avg Time (ms) ---- ----------- --------- ------------- ------------- ------------- -MXImperativeInvokeEx 2 0.3360 0.0990 0.2370 0.1680 +MXImperativeInvoke 2 0.3360 0.0990 0.2370 0.1680 MXNet C API Calls 17 0.2320 0.2160 0.2320 0.0080 MXNDArraySyncCopyFromCPU 1 0.1750 0.1750 0.1750 0.1750 -MXNDArrayCreateEx 1 0.1050 0.1050 0.1050 0.1050 -MXNDArrayGetShapeEx 11 0.0210 0.0000 0.0160 0.0019 +MXNDArrayCreate 1 0.1050 0.1050 0.1050 0.1050 +MXNDArrayGetShape 11 0.0210 0.0000 0.0160 0.0019 MXNDArrayWaitAll 1 0.0200 0.0200 0.0200 0.0200 MXNDArrayGetDType 1 0.0010 0.0010 0.0010 0.0010 MXNet C API Concurrency 34 0.0000 0.0000 0.0010 0.0000 @@ -157,8 +157,8 @@ The profiling data has captured info about interesting functions that have execu |**Function Name** |**Description** | |--- |--- | -|**MXImperativeInvokeEx** | invokes an operator to perform the computation | -|**MXNDArrayCreateEx** | creates an ndarray | +|**MXImperativeInvoke** | invokes an operator to perform the computation | +|**MXNDArrayCreate** | creates an ndarray | | **MXNDArrayGetDType** | returns the data type of the ndarray | | **MXNDArrayGetShape** | returns the shape of the ndarray (as a tuple where each element is the size of a dimension) | | **MXNDArraySyncCopyFromCPU** | called when data is initially residing outside of an MXNet data structure (ie. numpy.ndarry rather than mxnet.numpy.ndarray). Data is copied into the MXNet data structure | @@ -201,7 +201,7 @@ In the following list, #1 uses regular numpy functions to initialize data. MXNet ![dev_guide_profilling_3.png](/assets/img/dev_guide_profilling_3.png) Here, the four red arrows show the important events in this sequence. -1. First, the `MXNDArrayCreateEx` is called to physically allocate space to store the data and other necessary attributes in the `ndarray` class. +1. First, the `MXNDArrayCreate` is called to physically allocate space to store the data and other necessary attributes in the `ndarray` class. 2. Then some support functions are called (`MXNDArrayGetShape,` `MXNDArrayGetDType`) while initialing the data structure. 3. Finally the data is copied from the non-MXNet ndarray into the newly prepared MXNet ndarray by the `MXNDArraySyncCopyFromCPU` function. @@ -210,9 +210,9 @@ Next, #3 (in our code example) begins the computing process to produce our outpu ![dev_guide_profilling_4.png](/assets/img/dev_guide_profilling_4.png) Here you can see that the following sequence of events happen: -1. `MXImperativeInvokeEx` is called the first time to launch the diagonal operator from #3 (in our code example). +1. `MXImperativeInvoke` is called the first time to launch the diagonal operator from #3 (in our code example). 2. Soon after that the actual **`diag`** operator begins executing in another thread. -3. While that is happening, our main thread moves on and calls `MXImperativeInvokeEx` again to launch the **`sum`** operator. Just like before, this returns without actually executing the operator and continues. +3. While that is happening, our main thread moves on and calls `MXImperativeInvoke` again to launch the **`sum`** operator. Just like before, this returns without actually executing the operator and continues. 4. Lastly, the `MXNDArrayWaitAll` is called as the main thread has progressed to #4 in our app. It will wait here while all the computation finishes. Next lets look at a view of the part of the timeline zoomed to the actual operator execution. @@ -274,6 +274,6 @@ The first red box is the first run, and the 2nd smaller one is the 2nd run. Firs ![dev_guide_profilling_7.png](/assets/img/dev_guide_profilling_7.png) -We still have the same sequence of events at the beginning to initialize the MXNet ndarray (`MXNDArrayCreateEx`, `MXNDArrayGetShape`, `MXNDArrayGetDType`, `MXNDArraySyncCopyFromCPU`). Then the **`diag`** operator runs, followed by the **`sum`** operator, and finally the `waitall`. When you look at this, be careful about the assumptions that you make. In this version of the timeline, it appears that the operator executes after the `MXImperativeInvokeEx` runs, and seems to imply an inherent ordering. But realize that there is no dependency between the **`diag`** operator finishing and the next **`MXImperativeInvokeEx`** launching the **`sum`** operator. In this case, it just-so-happens that the **`diag`** operator finishes so quickly that it appears that way. But in reality the main thread is launching the operators and not waiting for them to finish. Lastly, keep in mind that in this case by the time we hit the **`MXNDArrayWaitAll`** everything is already done and we return immediately, but in other circumstances it may sit here waiting for everything to finish (like we saw earlier in the first run). +We still have the same sequence of events at the beginning to initialize the MXNet ndarray (`MXNDArrayCreate`, `MXNDArrayGetShape`, `MXNDArrayGetDType`, `MXNDArraySyncCopyFromCPU`). Then the **`diag`** operator runs, followed by the **`sum`** operator, and finally the `waitall`. When you look at this, be careful about the assumptions that you make. In this version of the timeline, it appears that the operator executes after the `MXImperativeInvoke` runs, and seems to imply an inherent ordering. But realize that there is no dependency between the **`diag`** operator finishing and the next **`MXImperativeInvoke`** launching the **`sum`** operator. In this case, it just-so-happens that the **`diag`** operator finishes so quickly that it appears that way. But in reality the main thread is launching the operators and not waiting for them to finish. Lastly, keep in mind that in this case by the time we hit the **`MXNDArrayWaitAll`** everything is already done and we return immediately, but in other circumstances it may sit here waiting for everything to finish (like we saw earlier in the first run). diff --git a/example/multi_threaded_inference/multi_threaded_inference.cc b/example/multi_threaded_inference/multi_threaded_inference.cc index f1d0d72ef774..b0b6869027d7 100644 --- a/example/multi_threaded_inference/multi_threaded_inference.cc +++ b/example/multi_threaded_inference/multi_threaded_inference.cc @@ -226,9 +226,9 @@ void run_inference(const std::string& model_name, const std::vector(sym.handle.value), len(flags), CBeginPtr(c_flag_keys), CBeginPtr(c_flag_vals), - &self.chandle)) + &self.chandle, + False)) def __del__(self): CALL(MXFreeCachedOp(self.chandle)) @@ -174,7 +175,7 @@ cdef class CachedOp: else: p_output_vars = &output_vars[0] - CALL(MXInvokeCachedOpEx( + CALL(MXInvokeCachedOp( self.chandle, len(args), &ndvars[0] if ndvars.size() != 0 else NULL, @@ -239,7 +240,7 @@ def _imperative_invoke(handle, ndargs, keys, vals, out, is_np_op=0, output_is_li cdef vector[const char*] param_keys = SVec2Ptr(ckeys) cdef vector[const char*] param_vals = SVec2Ptr(cvals) - CALL(MXImperativeInvokeEx( + CALL(MXImperativeInvoke( chandle, ndvars.size(), &ndvars[0] if ndvars.size() != 0 else NULL, diff --git a/python/mxnet/dlpack.py b/python/mxnet/dlpack.py index b5e8ee83304e..9ef005f1bb2a 100644 --- a/python/mxnet/dlpack.py +++ b/python/mxnet/dlpack.py @@ -99,7 +99,7 @@ def from_dlpack(dlpack): assert ctypes.pythonapi.PyCapsule_IsValid(dlpack, _c_str_dltensor), ValueError( 'Invalid DLPack Tensor. DLTensor capsules can be consumed only once.') dlpack_handle = ctypes.c_void_p(ctypes.pythonapi.PyCapsule_GetPointer(dlpack, _c_str_dltensor)) - check_call(_LIB.MXNDArrayFromDLPackEx(dlpack_handle, False, ctypes.byref(handle))) + check_call(_LIB.MXNDArrayFromDLPack(dlpack_handle, False, ctypes.byref(handle))) # Rename PyCapsule (DLPack) ctypes.pythonapi.PyCapsule_SetName(dlpack, _c_str_used_dltensor) # delete the deleter of the old dlpack @@ -180,6 +180,6 @@ def _make_dl_managed_tensor(array): ndarray.flags['WRITEABLE'] = False c_obj = _make_dl_managed_tensor(ndarray) handle = NDArrayHandle() - check_call(_LIB.MXNDArrayFromDLPackEx(ctypes.byref(c_obj), True, ctypes.byref(handle))) + check_call(_LIB.MXNDArrayFromDLPack(ctypes.byref(c_obj), True, ctypes.byref(handle))) return array_cls(handle=handle) return from_numpy diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index fa26dfff9628..0f638a1ed562 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -183,7 +183,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): dtype_type = np.dtype(dtype) else: dtype_type = np.dtype(dtype).type - check_call(_LIB.MXNDArrayCreateEx64( + check_call(_LIB.MXNDArrayCreate64( c_array_buf(mx_int64, native_array('q', shape)), ctypes.c_int(len(shape)), ctypes.c_int(ctx.device_typeid), @@ -205,7 +205,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): dtype_type = np.dtype(dtype) else: dtype_type = np.dtype(dtype).type - check_call(_LIB.MXNDArrayCreateEx( + check_call(_LIB.MXNDArrayCreate( c_array_buf(mx_uint, native_array('I', shape)), mx_uint(len(shape)), ctypes.c_int(ctx.device_typeid), @@ -218,7 +218,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): def _new_from_shared_mem(shared_pid, shared_id, shape, dtype): hdl = NDArrayHandle() - check_call(_LIB.MXNDArrayCreateFromSharedMemEx( + check_call(_LIB.MXNDArrayCreateFromSharedMem( ctypes.c_int(shared_pid), ctypes.c_int(shared_id), c_array(mx_int, shape), @@ -2426,11 +2426,11 @@ def shape(self): ndim = mx_int() if _int64_enabled(): pdata = ctypes.POINTER(mx_int64)() - check_call(_LIB.MXNDArrayGetShapeEx64( + check_call(_LIB.MXNDArrayGetShape64( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) else: pdata = ctypes.POINTER(mx_int)() - check_call(_LIB.MXNDArrayGetShapeEx( + check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) if ndim.value == -1: return None diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index b61686738391..87cd5cac2096 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -119,7 +119,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): # pylint: disa """ hdl = NDArrayHandle() if _int64_enabled(): - check_call(_LIB.MXNDArrayCreateEx64( + check_call(_LIB.MXNDArrayCreate64( c_array_buf(mx_int64, native_array('q', shape)), ctypes.c_int(len(shape)), ctypes.c_int(ctx.device_typeid), @@ -141,7 +141,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): # pylint: disa dtype_type = _np.dtype(dtype) else: dtype_type = _np.dtype(dtype).type - check_call(_LIB.MXNDArrayCreateEx( + check_call(_LIB.MXNDArrayCreate( c_array_buf(mx_uint, native_array('I', shape)), mx_uint(len(shape)), ctypes.c_int(ctx.device_typeid), @@ -2331,11 +2331,11 @@ def shape(self): num_dim = mx_int() if _int64_enabled(): pdata = ctypes.POINTER(mx_int64)() - check_call(_LIB.MXNDArrayGetShapeEx64( + check_call(_LIB.MXNDArrayGetShape64( self.handle, ctypes.byref(num_dim), ctypes.byref(pdata))) else: pdata = ctypes.POINTER(mx_int)() - check_call(_LIB.MXNDArrayGetShapeEx( + check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(num_dim), ctypes.byref(pdata))) if num_dim.value == -1: return None diff --git a/python/mxnet/profiler.py b/python/mxnet/profiler.py index d43f7383daa3..1b9583e1ecbb 100644 --- a/python/mxnet/profiler.py +++ b/python/mxnet/profiler.py @@ -185,11 +185,11 @@ def dumps(reset=False, format='table', sort_by='total', ascending=False): "Invalid value provided for ascending: {0}. Support: False, True".format(ascending) assert reset in reset_to_int.keys(),\ "Invalid value provided for reset: {0}. Support: False, True".format(reset) - check_call(_LIB.MXAggregateProfileStatsPrintEx(ctypes.byref(debug_str), - reset_to_int[reset], - format_to_int[format], - sort_by_to_int[sort_by], - asc_to_int[ascending])) + check_call(_LIB.MXAggregateProfileStatsPrint(ctypes.byref(debug_str), + reset_to_int[reset], + format_to_int[format], + sort_by_to_int[sort_by], + asc_to_int[ascending])) return py_str(debug_str.value) diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index 039ac0d9d195..b957675088e0 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -1242,9 +1242,9 @@ def _infer_shape_impl(self, partial, *args, **kwargs): out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int64))() aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int64))() if partial: - infer_func = _LIB.MXSymbolInferShapePartialEx64 + infer_func = _LIB.MXSymbolInferShapePartial64 else: - infer_func = _LIB.MXSymbolInferShapeEx64 + infer_func = _LIB.MXSymbolInferShape64 check_call(infer_func( self.handle, mx_uint(len(indptr) - 1), @@ -1271,9 +1271,9 @@ def _infer_shape_impl(self, partial, *args, **kwargs): out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() if partial: - infer_func = _LIB.MXSymbolInferShapePartialEx + infer_func = _LIB.MXSymbolInferShapePartial else: - infer_func = _LIB.MXSymbolInferShapeEx + infer_func = _LIB.MXSymbolInferShape check_call(infer_func( self.handle, mx_uint(len(indptr) - 1), diff --git a/python/mxnet/torch.py b/python/mxnet/torch.py deleted file mode 100644 index 295c019166cf..000000000000 --- a/python/mxnet/torch.py +++ /dev/null @@ -1,182 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# coding: utf-8 -"""Interface for NDArray functions executed by torch backend. -Install Torch and compile with USE_TORCH=1 to use this module.""" - -import ctypes -import sys -from .base import _LIB -from .base import c_array, c_str_array, c_handle_array, py_str, build_param_doc as _build_param_doc -from .base import mx_uint, mx_float, FunctionHandle -from .base import check_call -from .ndarray import NDArray, _new_empty_handle - -try: - _LUAJIT = ctypes.CDLL("libluajit.so", mode=ctypes.RTLD_GLOBAL) -except OSError: - _LUAJIT = None - -# pylint: disable=too-many-locals, invalid-name -def _make_torch_function(handle): - """Create a Torch function from the FunctionHandle.""" - # Get the property of function - n_used_vars = mx_uint() - n_scalars = mx_uint() - n_mutate_vars = mx_uint() - type_mask = ctypes.c_int() - check_call(_LIB.MXFuncDescribe( - handle, - ctypes.byref(n_used_vars), - ctypes.byref(n_scalars), - ctypes.byref(n_mutate_vars), - ctypes.byref(type_mask))) - n_mutate_vars = n_mutate_vars.value - n_used_vars = n_used_vars.value - n_scalars = n_scalars.value - type_mask = type_mask.value - - # Get the information from the function - name = ctypes.c_char_p() - desc = ctypes.c_char_p() - num_args = mx_uint() - arg_names = ctypes.POINTER(ctypes.c_char_p)() - arg_types = ctypes.POINTER(ctypes.c_char_p)() - arg_descs = ctypes.POINTER(ctypes.c_char_p)() - ret_type = ctypes.c_char_p() - - check_call(_LIB.MXFuncGetInfo( - handle, ctypes.byref(name), ctypes.byref(desc), - ctypes.byref(num_args), - ctypes.byref(arg_names), - ctypes.byref(arg_types), - ctypes.byref(arg_descs), - ctypes.byref(ret_type))) - func_name = py_str(name.value) - if not func_name.startswith('_th_'): - return None - narg = int(num_args.value) - param_str = _build_param_doc( - [py_str(arg_names[i]) for i in range(narg)], - [py_str(arg_types[i]) for i in range(narg)], - [py_str(arg_descs[i]) for i in range(narg)]) - - if n_mutate_vars > 1: - res = ','.join(['res%d '%i for i in range(n_mutate_vars)]) - else: - res = 'res ' - doc_str = (('Interface for Torch function {name}.\n' + - 'Invoke with\n{res}= mxnet.th.{name}(Parameters)\nor\n'+ - 'mxnet.th.{name}({res}, Parameters).\n\n' + - '{param_str}\n' + - 'References: ' + - 'https://github.com/torch/torch7/blob/master/doc/maths.md\n').format( - name=func_name[4:], param_str=param_str, - res=res)) - - def generic_torch_function(*args, **kwargs): - """Invoke this function by passing in parameters. - - Parameters - ---------- - *args - Positional arguments of inputs (both scalar and `NDArray`). - - Returns - ------- - out : NDArray - The result NDArray(tuple) of result of computation. - """ - ndargs = [] - arg_format = '' - value = '' - for arg in args: - if isinstance(arg, NDArray): - ndargs.append(arg) - arg_format += 'n' - value += ',' - elif isinstance(arg, int): - arg_format += 'i' - value += str(arg) + ',' - elif isinstance(arg, str): - arg_format += 's' - value += str(arg) + ',' - elif isinstance(arg, float): - arg_format += 'f' - value += str(arg) + ',' - elif isinstance(arg, bool): - arg_format += 'b' - value += str(arg) + ',' - value = value[:-1] - if len(ndargs) == n_used_vars: - ndargs = [NDArray(_new_empty_handle()) for _ in range(n_mutate_vars)] + ndargs - arg_format = 'n'*n_mutate_vars + arg_format - value = ','*n_mutate_vars + value - elif len(ndargs) == n_mutate_vars + n_used_vars: - pass - else: - raise AssertionError(('Incorrect number of input NDArrays. ' + - 'Need to be either %d (inputs) or %d ' + - '(output buffer) + %d (input)') % - (n_used_vars, n_mutate_vars, n_used_vars)) - - kwargs['format'] = arg_format - kwargs['args'] = value - - for k in kwargs: - kwargs[k] = str(kwargs[k]) - - check_call(_LIB.MXFuncInvokeEx( - handle, - c_handle_array(ndargs[n_mutate_vars:]), # pylint: disable=invalid-slice-index - c_array(mx_float, []), - c_handle_array(ndargs[:n_mutate_vars]), # pylint: disable=invalid-slice-index - ctypes.c_int(len(kwargs)), - c_str_array(kwargs.keys()), - c_str_array(kwargs.values()))) - - if n_mutate_vars == 1: - return ndargs[0] - else: - return ndargs[:n_mutate_vars] # pylint: disable=invalid-slice-index - - # End of function declaration - ret_function = generic_torch_function - ret_function.__name__ = func_name[4:] - ret_function.__doc__ = doc_str - return ret_function - -# pylint: enable=too-many-locals, invalid-name - -def _init_torch_module(): - """List and add all the torch backed ndarray functions to current module.""" - plist = ctypes.POINTER(FunctionHandle)() - size = ctypes.c_uint() - check_call(_LIB.MXListFunctions(ctypes.byref(size), - ctypes.byref(plist))) - - module_obj = sys.modules[__name__] - for i in range(size.value): - hdl = FunctionHandle(plist[i]) - function = _make_torch_function(hdl) - # if function name starts with underscore, register as static method of NDArray - if function is not None: - setattr(module_obj, function.__name__, function) - -# Initialize the NDArray module -_init_torch_module() diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 23049f1b8867..30194494f599 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1638,42 +1638,26 @@ void CreateNDArray(const DataType* shape, *out = nd; } +int MXNDArrayCreate64(const int64_t *shape, + int ndim, + int dev_type, + int dev_id, + int delay_alloc, + int dtype, + NDArrayHandle *out) { + API_BEGIN(); + CreateNDArray(shape, ndim, dev_type, dev_id, delay_alloc, dtype, out); + API_END(); +} + int MXNDArrayCreate(const uint32_t *shape, uint32_t ndim, int dev_type, int dev_id, int delay_alloc, + int dtype, NDArrayHandle *out) { API_BEGIN(); - NDArray* nd = new NDArray(mxnet::TShape(shape, shape + ndim), - Context::Create(static_cast(dev_type), dev_id), - delay_alloc != 0); - nd->AssignStorageInfo(profiler::ProfilerScope::Get()->GetCurrentProfilerScope(), - MXNET_STORAGE_DEFAULT_NAME_CSTR); - *out = nd; - API_END(); -} - -int MXNDArrayCreateEx64(const int64_t *shape, - int ndim, - int dev_type, - int dev_id, - int delay_alloc, - int dtype, - NDArrayHandle *out) { - API_BEGIN(); - CreateNDArray(shape, ndim, dev_type, dev_id, delay_alloc, dtype, out); - API_END(); -} - -int MXNDArrayCreateEx(const uint32_t *shape, - uint32_t ndim, - int dev_type, - int dev_id, - int delay_alloc, - int dtype, - NDArrayHandle *out) { - API_BEGIN(); CreateNDArray(shape, static_cast(ndim), dev_type, dev_id, delay_alloc, dtype, out); API_END(); } @@ -2041,25 +2025,6 @@ int MXNDArrayGetStorageType(NDArrayHandle handle, API_END(); } -int MXNDArrayGetShape(NDArrayHandle handle, - uint32_t *out_dim, - const uint32_t **out_pdata) { - MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); - API_BEGIN(); - NDArray *arr = static_cast(handle); - if (!arr->is_none()) { - const mxnet::TShape &s = arr->shape(); - *out_dim = s.ndim(); - std::vector& buffer = ret->arg_shape_buffer; - buffer.resize(s.ndim()); - nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data()); - *out_pdata = buffer.data(); - } else { - *out_dim = 0; - } - API_END(); -} - template inline void GetShape(NDArrayHandle handle, const dtype** out_pdata, int* out_dim, MXAPIThreadLocalEntry* ret) { @@ -2099,18 +2064,18 @@ inline void GetShape(NDArrayHandle handle, const dtype** out_pdata, int* out_dim } } -int MXNDArrayGetShapeEx(NDArrayHandle handle, - int *out_dim, - const int **out_pdata) { +int MXNDArrayGetShape(NDArrayHandle handle, + int *out_dim, + const int **out_pdata) { MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); API_BEGIN(); GetShape(handle, out_pdata, out_dim, ret); API_END(); } -int MXNDArrayGetShapeEx64(NDArrayHandle handle, - int *out_dim, - const int64_t **out_pdata) { +int MXNDArrayGetShape64(NDArrayHandle handle, + int *out_dim, + const int64_t **out_pdata) { MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); GetShape(handle, out_pdata, out_dim, ret); @@ -2144,13 +2109,8 @@ int MXNDArrayToDLPack(NDArrayHandle handle, } int MXNDArrayFromDLPack(DLManagedTensorHandle dlpack, + const bool transient_handle, NDArrayHandle *out_handle) { - return MXNDArrayFromDLPackEx(dlpack, false, out_handle); -} - -int MXNDArrayFromDLPackEx(DLManagedTensorHandle dlpack, - const bool transient_handle, - NDArrayHandle *out_handle) { API_BEGIN(); *out_handle = new NDArray(NDArray::FromDLPack( static_cast(dlpack), @@ -2310,21 +2270,6 @@ int MXFuncDescribe(FunctionHandle fun, } int MXFuncInvoke(FunctionHandle fun, - NDArrayHandle *use_vars, - float *scalar_args, - NDArrayHandle *mutate_vars) { - API_BEGIN(); - auto *f = static_cast(fun); - f->body((NDArray**)(use_vars), // NOLINT(*) - scalar_args, - (NDArray**)(mutate_vars), // NOLINT(*) - 0, - nullptr, - nullptr); - API_END(); -} - -int MXFuncInvokeEx(FunctionHandle fun, NDArrayHandle *use_vars, float *scalar_args, NDArrayHandle *mutate_vars, @@ -3347,18 +3292,8 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar API_END(); } -int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const uint32_t *shape, - uint32_t ndim, int dtype, NDArrayHandle *out) { - API_BEGIN(); - NDArray* nd = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype); - nd->AssignStorageInfo(profiler::ProfilerScope::Get()->GetCurrentProfilerScope(), - MXNET_STORAGE_DEFAULT_NAME_CSTR); - *out = nd; - API_END(); -} - -int MXNDArrayCreateFromSharedMemEx(int shared_pid, int shared_id, const int *shape, - int ndim, int dtype, NDArrayHandle *out) { +int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, + int ndim, int dtype, NDArrayHandle *out) { API_BEGIN(); NDArray* nd = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype); nd->AssignStorageInfo(profiler::ProfilerScope::Get()->GetCurrentProfilerScope(), diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index ebb3134ae7f3..95346e897b56 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -143,22 +143,8 @@ int MXImperativeInvoke(AtomicSymbolCreator creator, NDArrayHandle **outputs, int num_params, const char **param_keys, - const char **param_vals) { - API_BEGIN(); - MXImperativeInvokeImpl(creator, num_inputs, inputs, num_outputs, outputs, - num_params, param_keys, param_vals); - API_END(); -} - -int MXImperativeInvokeEx(AtomicSymbolCreator creator, - int num_inputs, - NDArrayHandle *inputs, - int *num_outputs, - NDArrayHandle **outputs, - int num_params, - const char **param_keys, - const char **param_vals, - const int **out_stypes) { // outputs storage types + const char **param_vals, + const int **out_stypes) { // outputs storage types MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); API_BEGIN(); MXImperativeInvokeImpl(creator, num_inputs, inputs, num_outputs, outputs, @@ -174,41 +160,11 @@ int MXImperativeInvokeEx(AtomicSymbolCreator creator, } int MXCreateCachedOp(SymbolHandle handle, - CachedOpHandle *out) { - nnvm::Symbol* sym = static_cast(handle); - - API_BEGIN(); - auto inputs = sym->ListInputs(nnvm::Symbol::kAll); - std::vector input_names; - input_names.reserve(inputs.size()); - for (const auto& i : inputs) input_names.push_back(i->attrs.name); - *out = new CachedOpPtr(new CachedOp( - *sym, std::vector >())); - API_END(); -} - -int MXCreateCachedOpEx(SymbolHandle handle, - int num_flags, - const char** keys, - const char** vals, - CachedOpHandle *out) { - nnvm::Symbol* sym = static_cast(handle); - - API_BEGIN(); - std::vector > flags; - for (int i = 0; i < num_flags; ++i) { - flags.emplace_back(keys[i], vals[i]); - } - *out = new CachedOpPtr(new CachedOp(*sym, flags)); - API_END(); -} - -int MXCreateCachedOpEX(SymbolHandle handle, - int num_flags, - const char** keys, - const char** vals, - CachedOpHandle *out, - bool thread_safe) { + int num_flags, + const char** keys, + const char** vals, + CachedOpHandle *out, + bool thread_safe) { nnvm::Symbol* sym = static_cast(handle); API_BEGIN(); std::vector > flags; @@ -243,14 +199,14 @@ int MXCachedOpGetOptimizedSymbol(CachedOpHandle handle, API_END_HANDLE_ERROR(delete s); } -int MXInvokeCachedOpEx(CachedOpHandle handle, - int num_inputs, - NDArrayHandle *inputs, - int default_dev_type, - int default_dev_id, - int *num_outputs, - NDArrayHandle **outputs, - const int **out_stypes) { // outputs storage types +int MXInvokeCachedOp(CachedOpHandle handle, + int num_inputs, + NDArrayHandle *inputs, + int default_dev_type, + int default_dev_id, + int *num_outputs, + NDArrayHandle **outputs, + const int **out_stypes) { // outputs storage types MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); API_BEGIN(); diff --git a/src/c_api/c_api_profile.cc b/src/c_api/c_api_profile.cc index 79d11b92dff6..bdc7664fc061 100644 --- a/src/c_api/c_api_profile.cc +++ b/src/c_api/c_api_profile.cc @@ -316,12 +316,8 @@ int MXSetProfilerConfig(int num_params, const char* const* keys, const char* con return MXSetProcessProfilerConfig(num_params, keys, vals, nullptr); } -int MXAggregateProfileStatsPrint(const char **out_str, int reset) { - return MXAggregateProfileStatsPrintEx(out_str, reset, 0, 0, 0); -} - -int MXAggregateProfileStatsPrintEx(const char **out_str, int reset, int format, int sort_by, - int ascending) { +int MXAggregateProfileStatsPrint(const char **out_str, int reset, int format, int sort_by, + int ascending) { MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); API_BEGIN(); CHECK_NOTNULL(out_str); diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index caee7626c8dd..6f5f03a59a15 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -571,79 +571,6 @@ void MatchArguments( } // namespace mxnet -int MXSymbolInferShape(SymbolHandle sym, - uint32_t num_args, - const char** keys, - const uint32_t *arg_ind_ptr, - const uint32_t *arg_shape_data, - uint32_t *in_shape_size, - const uint32_t **in_shape_ndim, - const uint32_t ***in_shape_data, - uint32_t *out_shape_size, - const uint32_t **out_shape_ndim, - const uint32_t ***out_shape_data, - uint32_t *aux_shape_size, - const uint32_t **aux_shape_ndim, - const uint32_t ***aux_shape_data, - int *complete) { - nnvm::Symbol *s = static_cast(sym); - MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); - API_BEGIN(); - nnvm::Graph g = Symbol2Graph(*s); - mxnet::ShapeVector arg_shapes(g.indexed_graph().input_nodes().size(), mxnet::TShape()); - if (keys == nullptr && num_args != 0) { - std::vector read_only_args = mxnet::ReadOnlyArgIndices(g.indexed_graph()); - CHECK_LE(num_args, read_only_args.size()); - for (uint32_t i = 0; i < num_args; ++i) { - arg_shapes[read_only_args[i]] = mxnet::ShapeTypeCast( - arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]); - } - } else { - std::unordered_map kwargs; - for (uint32_t i = 0; i < num_args; ++i) { - kwargs[keys[i]] = mxnet::ShapeTypeCast( - arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]); - } - mxnet::MatchArguments(g.indexed_graph(), kwargs, &arg_shapes, "InferShape"); - } - - try { - g = mxnet::exec::InferShape(std::move(g), std::move(arg_shapes), "__shape__"); - } catch (const mxnet::op::InferShapeError &err) { - throw dmlc::Error(err.msg); - } - - // if use legacy shape definition, need to convert numpy shape to legacy shape - mxnet::ShapeVector shapes = g.GetAttr("shape"); - if (!Imperative::Get()->is_np_shape()) { - common::ConvertToLegacyShape(&shapes); - } - - // copy back - CopyAttr(g.indexed_graph(), shapes, - &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes)); - - // copy data back - MXAPIThreadLocalEntry<>::SetupShapeArrayReturnWithBuffer(ret->arg_shapes, - &(ret->arg_shape_ndim), &(ret->arg_shape_data), &(ret->arg_shape_buffer)); - MXAPIThreadLocalEntry<>::SetupShapeArrayReturnWithBuffer(ret->out_shapes, - &(ret->out_shape_ndim), &(ret->out_shape_data), &(ret->out_shape_buffer)); - MXAPIThreadLocalEntry<>::SetupShapeArrayReturnWithBuffer(ret->aux_shapes, - &(ret->aux_shape_ndim), &(ret->aux_shape_data), &(ret->aux_shape_buffer)); - *in_shape_size = static_cast(ret->arg_shapes.size()); - *in_shape_ndim = dmlc::BeginPtr(ret->arg_shape_ndim); - *in_shape_data = dmlc::BeginPtr(ret->arg_shape_data); - *out_shape_size = static_cast(ret->out_shapes.size()); - *out_shape_ndim = dmlc::BeginPtr(ret->out_shape_ndim); - *out_shape_data = dmlc::BeginPtr(ret->out_shape_data); - *aux_shape_size = static_cast(ret->aux_shapes.size()); - *aux_shape_ndim = dmlc::BeginPtr(ret->aux_shape_ndim); - *aux_shape_data = dmlc::BeginPtr(ret->aux_shape_data); - // mark complete - *complete = (g.GetAttr("shape_num_unknown_nodes") == 0); - API_END(); -} - template inline void SymbolInferShape(const char** keys, uint32_t num_args, @@ -737,21 +664,21 @@ inline void SymbolInferShape(const char** keys, * \param complete indicates completion of Shape Inference * \return 0 when success, -1 when failure happens */ -int MXSymbolInferShapeEx(SymbolHandle sym, - uint32_t num_args, - const char** keys, - const uint32_t *arg_ind_ptr, - const int *arg_shape_data, - uint32_t *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, - uint32_t *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, - uint32_t *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, - int *complete) { +int MXSymbolInferShape(SymbolHandle sym, + uint32_t num_args, + const char** keys, + const uint32_t *arg_ind_ptr, + const int *arg_shape_data, + uint32_t *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + uint32_t *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + uint32_t *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *complete) { nnvm::Symbol *s = static_cast(sym); MXAPIThreadLocalEntry<> *ret = MXAPIThreadLocalStore<>::Get(); API_BEGIN(); @@ -795,21 +722,21 @@ int MXSymbolInferShapeEx(SymbolHandle sym, * \param complete indicates completion of Shape Inference * \return 0 when success, -1 when failure happens */ -int MXSymbolInferShapeEx64(SymbolHandle sym, - uint32_t num_args, - const char** keys, - const int64_t *arg_ind_ptr, - const int64_t *arg_shape_data, - size_t *in_shape_size, - const int **in_shape_ndim, - const int64_t ***in_shape_data, - size_t *out_shape_size, - const int **out_shape_ndim, - const int64_t ***out_shape_data, - size_t *aux_shape_size, - const int **aux_shape_ndim, - const int64_t ***aux_shape_data, - int *complete) { +int MXSymbolInferShape64(SymbolHandle sym, + uint32_t num_args, + const char** keys, + const int64_t *arg_ind_ptr, + const int64_t *arg_shape_data, + size_t *in_shape_size, + const int **in_shape_ndim, + const int64_t ***in_shape_data, + size_t *out_shape_size, + const int **out_shape_ndim, + const int64_t ***out_shape_data, + size_t *aux_shape_size, + const int **aux_shape_ndim, + const int64_t ***aux_shape_data, + int *complete) { nnvm::Symbol *s = static_cast(sym); MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); @@ -832,20 +759,41 @@ int MXSymbolInferShapeEx64(SymbolHandle sym, API_END(); } +/*! + * \brief Executor for Symbol Partial Shape Inference + * This api is available when MXNet is built with flag + * USE_INT64_TENSOR_SIZE=0 (by default) + * \param sym symbol handle + * \param num_args number of args + * \param keys keys + * \param arg_ind_ptr arg index pointer + * \param arg_shape_data arg shape data + * \param in_shape_size input shape size + * \param in_shape_ndim input shape number of dims + * \param in_shape_data input shape data + * \param out_shape_size ouput shape size + * \param out_shape_ndim output shape number of dims + * \param out_shape_data output shape data + * \param aux_shape_size shape size of auxiliary states + * \param aux_shape_ndim number of dims of auxiliary states shape + * \param aux_shape_data shape data of auxiliary states + * \param complete indicates completion of Shape Inference + * \return 0 when success, -1 when failure happens + */ int MXSymbolInferShapePartial(SymbolHandle sym, uint32_t num_args, const char** keys, const uint32_t *arg_ind_ptr, - const uint32_t *arg_shape_data, + const int *arg_shape_data, uint32_t *in_shape_size, - const uint32_t **in_shape_ndim, - const uint32_t ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, uint32_t *out_shape_size, - const uint32_t **out_shape_ndim, - const uint32_t ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, uint32_t *aux_shape_size, - const uint32_t **aux_shape_ndim, - const uint32_t ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *complete) { int succ = 0; *complete = 1; @@ -860,7 +808,7 @@ int MXSymbolInferShapePartial(SymbolHandle sym, /*! * \brief Executor for Symbol Partial Shape Inference * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=0 (by default) + * USE_INT64_TENSOR_SIZE=1 (not default) i.e. Large Tensor Support * \param sym symbol handle * \param num_args number of args * \param keys keys @@ -878,24 +826,24 @@ int MXSymbolInferShapePartial(SymbolHandle sym, * \param complete indicates completion of Shape Inference * \return 0 when success, -1 when failure happens */ -int MXSymbolInferShapePartialEx(SymbolHandle sym, +int MXSymbolInferShapePartial64(SymbolHandle sym, uint32_t num_args, const char** keys, - const uint32_t *arg_ind_ptr, - const int *arg_shape_data, - uint32_t *in_shape_size, + const int64_t *arg_ind_ptr, + const int64_t *arg_shape_data, + size_t *in_shape_size, const int **in_shape_ndim, - const int ***in_shape_data, - uint32_t *out_shape_size, + const int64_t ***in_shape_data, + size_t *out_shape_size, const int **out_shape_ndim, - const int ***out_shape_data, - uint32_t *aux_shape_size, + const int64_t ***out_shape_data, + size_t *aux_shape_size, const int **aux_shape_ndim, - const int ***aux_shape_data, + const int64_t ***aux_shape_data, int *complete) { int succ = 0; *complete = 1; - return MXSymbolInferShapeEx(sym, num_args, keys, + return MXSymbolInferShape64(sym, num_args, keys, arg_ind_ptr, arg_shape_data, in_shape_size, in_shape_ndim, in_shape_data, out_shape_size, out_shape_ndim, out_shape_data, @@ -903,52 +851,6 @@ int MXSymbolInferShapePartialEx(SymbolHandle sym, &succ); } -/*! - * \brief Executor for Symbol Partial Shape Inference - * This api is available when MXNet is built with flag - * USE_INT64_TENSOR_SIZE=1 (not default) i.e. Large Tensor Support - * \param sym symbol handle - * \param num_args number of args - * \param keys keys - * \param arg_ind_ptr arg index pointer - * \param arg_shape_data arg shape data - * \param in_shape_size input shape size - * \param in_shape_ndim input shape number of dims - * \param in_shape_data input shape data - * \param out_shape_size ouput shape size - * \param out_shape_ndim output shape number of dims - * \param out_shape_data output shape data - * \param aux_shape_size shape size of auxiliary states - * \param aux_shape_ndim number of dims of auxiliary states shape - * \param aux_shape_data shape data of auxiliary states - * \param complete indicates completion of Shape Inference - * \return 0 when success, -1 when failure happens - */ -int MXSymbolInferShapePartialEx64(SymbolHandle sym, - uint32_t num_args, - const char** keys, - const int64_t *arg_ind_ptr, - const int64_t *arg_shape_data, - size_t *in_shape_size, - const int **in_shape_ndim, - const int64_t ***in_shape_data, - size_t *out_shape_size, - const int **out_shape_ndim, - const int64_t ***out_shape_data, - size_t *aux_shape_size, - const int **aux_shape_ndim, - const int64_t ***aux_shape_data, - int *complete) { - int succ = 0; - *complete = 1; - return MXSymbolInferShapeEx64(sym, num_args, keys, - arg_ind_ptr, arg_shape_data, - in_shape_size, in_shape_ndim, in_shape_data, - out_shape_size, out_shape_ndim, out_shape_data, - aux_shape_size, aux_shape_ndim, aux_shape_data, - &succ); -} - int MXSymbolInferType(SymbolHandle sym, uint32_t num_args, const char** keys, diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h deleted file mode 100644 index 0b9981737634..000000000000 --- a/src/operator/convolution_v1-inl.h +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file convolution_v1-inl.h - * \brief - * \author Bing Xu -*/ -#ifndef MXNET_OPERATOR_CONVOLUTION_V1_INL_H_ -#define MXNET_OPERATOR_CONVOLUTION_V1_INL_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "./operator_common.h" -#include "./linalg.h" - -namespace mxnet { -namespace op { - -namespace conv_v1 { -enum ConvolutionV1OpInputs {kData, kWeight, kBias}; -enum ConvolutionV1OpOutputs {kOut}; -enum ConvolutionV1OpResource {kTempSpace}; -enum ConvolutionV1OpCudnnTune {kOff, kLimited, kFastest}; -} - -struct ConvolutionV1Param : public dmlc::Parameter { - mxnet::TShape kernel; - mxnet::TShape stride; - mxnet::TShape dilate; - mxnet::TShape pad; - uint32_t num_filter; - uint32_t num_group; - uint64_t workspace; - bool no_bias; - dmlc::optional cudnn_tune; - bool cudnn_off; - dmlc::optional layout; - DMLC_DECLARE_PARAMETER(ConvolutionV1Param) { - DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) - .describe("convolution stride: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) - .describe("convolution dilate: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) - .describe("pad for convolution: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(num_filter).set_lower_bound(1) - .describe("convolution filter(channel) number"); - DMLC_DECLARE_FIELD(num_group).set_default(1) - .describe("Number of group partitions. Equivalent to slicing input into num_group\n " - "partitions, apply convolution on each, then concatenate the results"); - DMLC_DECLARE_FIELD(workspace).set_default(1024).set_lower_bound(0) - .describe("Maximum temporary workspace allowed for convolution (MB)." - "This parameter determines the effective batch size of the convolution " - "kernel, which may be smaller than the given batch size. " - "Also, the workspace will be automatically enlarged to make sure that we can " - "run the kernel with batch_size=1"); - DMLC_DECLARE_FIELD(no_bias).set_default(false) - .describe("Whether to disable bias parameter."); - DMLC_DECLARE_FIELD(cudnn_tune) - .add_enum("off", conv_v1::kOff) - .add_enum("limited_workspace", conv_v1::kLimited) - .add_enum("fastest", conv_v1::kFastest) - .set_default(dmlc::optional()) - .describe("Whether to pick convolution algo by running performance test.\n " - "Leads to higher startup time but may give faster speed. Options are:\n " - "\'off\': no tuning\n " - "\'limited_workspace\': run test and pick the fastest algorithm " - "that doesn't exceed workspace limit.\n " - "\'fastest\': pick the fastest algorithm and ignore workspace limit.\n " - "If set to None (default), behavior is determined by environment\n " - "variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off,\n " - "1 for limited workspace (default), 2 for fastest."); - DMLC_DECLARE_FIELD(cudnn_off).set_default(false) - .describe("Turn off cudnn for this layer."); - DMLC_DECLARE_FIELD(layout) - .add_enum("NCHW", mshadow::kNCHW) - .add_enum("NHWC", mshadow::kNHWC) - .add_enum("NCDHW", mshadow::kNCDHW) - .add_enum("NDHWC", mshadow::kNDHWC) - .set_default(dmlc::optional()) - .describe("Set layout for input, output and weight. Empty for\n " - "default layout: NCHW for 2d and NCDHW for 3d."); - } -}; - -template -class ConvolutionV1Op : public Operator { - public: - explicit ConvolutionV1Op(ConvolutionV1Param p) { - this->param_ = p; - // convert MBytes first to Bytes and then to elements. - param_.workspace = (param_.workspace << 20) / sizeof(DType); - CHECK(param_.layout.value() == mshadow::kNCHW || - param_.layout.value() == mshadow::kNCDHW) - << "Only support NCHW and NCDHW layout"; - } - - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(req[conv_v1::kOut], kWriteTo); - size_t expected = param_.no_bias ? 2 : 3; - CHECK_EQ(in_data.size(), expected); - CHECK_EQ(out_data.size(), 1U); - Stream *s = ctx.get_stream(); - if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "Volume convolution is not implmented in mshadow"; - } - Tensor data = in_data[conv_v1::kData].get(s); - Shape<3> wmat_shape = - Shape3(param_.num_group, - param_.num_filter / param_.num_group, - data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]); - Tensor wmat = - in_data[conv_v1::kWeight].get_with_shape(wmat_shape, s); - Tensor out = out_data[conv_v1::kOut].get(s); -#if defined(__CUDACC__) - CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; -#endif - const index_t nbatch = data.size(0); - Tensor workspace = - ctx.requested[conv_v1::kTempSpace].get_space_typed( - Shape1(this->InitTemp(data.shape_, out.shape_)), s); - for (index_t i = 0; i < nbatch; i += nstep_) { - const index_t step = std::min(nstep_, nbatch - i); - Tensor temp_col = Tensor(workspace.dptr_, - Shape2(shape_colunit_[0], - shape_colunit_[1] * step), s); - Tensor temp_dst = Tensor( - workspace.dptr_ + temp_col.shape_.Size(), - Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * step), s); - if (param_.pad[0] == 0 && param_.pad[1] == 0) { - temp_col = unpack_patch2col(data.Slice(i, i + step), - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1]); - } else { - temp_col = unpack_patch2col(pad(data.Slice(i, i + step), - param_.pad[0], param_.pad[1]), - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1]); - } - - const index_t gstride = temp_col.size(0) / param_.num_group; - for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - mshadow::Tensor tmpc = temp_col.Slice(gstride * gid, - gstride * (gid + 1)); - // Legacy approach shown here for comparison: - // temp_dst[gid] = dot(wmat[gid], tmpc); - linalg_gemm(wmat[gid], tmpc, temp_dst[gid], false, false, s); - } - out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst, - mshadow::Shape4(param_.num_filter, - step, - out.size(2), - out.size(3)))); - } - if (!param_.no_bias) { - // add bias, broadcast bias to dim 1: channel - Tensor bias = in_data[conv_v1::kBias].get(s); - out += broadcast<1>(bias, out.shape_); - } - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - // TODO(bing): check the BLAS Handle, be careful - if (param_.kernel.ndim() > 2) { - LOG(FATAL) << "Volume convolution is not implmented in mshadow"; - } - CHECK_EQ(out_grad.size(), 1); - size_t expected = param_.no_bias == 0 ? 3 : 2; - CHECK(in_data.size() == expected && in_grad.size() == expected); - CHECK_EQ(req.size(), expected); - CHECK_EQ(in_data[conv_v1::kWeight].CheckContiguous(), true); - // get data - Stream *s = ctx.get_stream(); - Tensor data = in_data[conv_v1::kData].get(s); - Shape<3> wmat_shape = - Shape3(param_.num_group, - param_.num_filter / param_.num_group, - data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]); - Tensor wmat = - in_data[conv_v1::kWeight].get_with_shape(wmat_shape, s); - Tensor grad = out_grad[conv_v1::kOut].get(s); - Tensor gdata = in_grad[conv_v1::kData].get(s); - Tensor gwmat = - in_grad[conv_v1::kWeight].get_with_shape(wmat_shape, s); -#if defined(__CUDACC__) - CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) - << "Must init CuBLAS handle in stream"; -#endif - const index_t nbatch = data.size(0); - Tensor workspace = - ctx.requested[conv_v1::kTempSpace].get_space_typed( - Shape1(this->InitTemp(data.shape_, grad.shape_)), s); - for (index_t i = 0; i < nbatch; i += nstep_) { - const index_t step = std::min(nstep_, nbatch - i); - Tensor temp_col = Tensor(workspace.dptr_, - Shape2(shape_colunit_[0], - shape_colunit_[1] * step), s); - Tensor temp_dst = Tensor( - workspace.dptr_ + temp_col.shape_.Size(), - Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * step), s); - temp_dst = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), temp_dst.shape_); - if (param_.pad[0] == 0 && param_.pad[1] == 0) { - temp_col = unpack_patch2col(data.Slice(i, i + step), - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1]); - } else { - temp_col = unpack_patch2col(pad(data.Slice(i, i + step), param_.pad[0], param_.pad[1]), - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1]); - } - const index_t gstride = temp_col.size(0) / param_.num_group; - for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); - if (i == 0) { - Tensor tmp_gwmat = gwmat[gid]; - // Legacy approach shown here for comparison: - // Assign(tmp_gwmat, req[conv_v1::kWeight], dot(temp_dst[gid], tmpc.T())); - linalg_gemm(temp_dst[gid], tmpc, tmp_gwmat, false, true, s, req[conv_v1::kWeight]); - } else { - // Legacy approach shown here for comparison: - // gwmat[gid] += dot(temp_dst[gid], tmpc.T()); - linalg_gemm(temp_dst[gid], tmpc, gwmat[gid], false, true, s, kAddTo); - } - } - - for (uint32_t gid = 0; gid < param_.num_group; ++gid) { - Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); - // Legacy approach shown here for comparison: - // tmpc = dot(wmat[gid].T(), temp_dst[gid]); - linalg_gemm(wmat[gid], temp_dst[gid], tmpc, true, false, s); - } - if (param_.pad[0] == 0 && param_.pad[1] == 0) { - Assign(gdata.Slice(i, i + step), req[conv_v1::kData], - pack_col2patch(temp_col, - data.Slice(i, i + step).shape_, - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1])); - } else { - Shape<4> pshape = data.Slice(i, i + step).shape_; - pshape[2] += 2 * param_.pad[0]; - pshape[3] += 2 * param_.pad[1]; - Assign(gdata.Slice(i, i + step), req[conv_v1::kData], - crop(pack_col2patch(temp_col, - pshape, - param_.kernel[0], - param_.kernel[1], - param_.stride[0], - param_.stride[1], - param_.dilate[0], - param_.dilate[1]), - gdata[i][0].shape_)); - } - } - if (!param_.no_bias) { - Tensor gbias = in_grad[conv_v1::kBias].get(s); - Assign(gbias, req[conv_v1::kBias], sumall_except_dim<1>(grad)); - } - } - - private: - inline index_t InitTemp(const mshadow::Shape<4> &ishape, - const mshadow::Shape<4> &oshape) { - const int ksize_y = param_.kernel[0]; - const int ksize_x = param_.kernel[1]; - shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x, - oshape[2] * oshape[3]); - shape_dstunit_ = mshadow::Shape3(param_.num_group, - param_.num_filter / param_.num_group, - oshape[2] * oshape[3]); - // param_.workspace is in elements of sizeof(DType) - // if param_.workspace is set to zero the nstep_ equals ishape[0] (batch) - nstep_ = std::max( - std::min(param_.workspace / - (shape_colunit_.Size() + shape_dstunit_.Size()), ishape[0]), - 1); - - mshadow::Shape<2> scol = mshadow::Shape2(shape_colunit_[0], - shape_colunit_[1] * nstep_); - mshadow::Shape<3> sdst = mshadow::Shape3(shape_dstunit_[0], - shape_dstunit_[1], - shape_dstunit_[2] * nstep_); - index_t required_size = scol.Size() + sdst.Size(); - return required_size; - } - - ConvolutionV1Param param_; - mshadow::Shape<2> shape_colunit_; - mshadow::Shape<3> shape_dstunit_; - index_t nstep_; -}; // class ConvolutionV1Op - -template -Operator* CreateOp(ConvolutionV1Param param, int dtype, - mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - Context ctx); - -#if DMLC_USE_CXX11 -class ConvolutionV1Prop : public OperatorProperty { - public: - std::vector ListArguments() const override { - if (!param_.no_bias) { - return {"data", "weight", "bias"}; - } else { - return {"data", "weight"}; - } - } - - void Init(const std::vector >& kwargs) override { - using namespace mshadow; - param_.Init(kwargs); - if (param_.kernel.ndim() == 2) { - param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW; - if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); - if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); - } else { - CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() << "D convolution not supported"; - param_.layout = param_.layout ? param_.layout.value(): mshadow::kNCDHW; - if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1); - if (param_.dilate.ndim() == 0) param_.dilate = Shape3(1, 1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0); - } - } - - std::map GetParams() const override { - return param_.__DICT__(); - } - - bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const override { - using namespace mshadow; - if (!param_.no_bias) { - CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]"; - } else { - CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]"; - } - // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; - out_shape->resize(1, mxnet::TShape()); - const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData]; - if (!mxnet::ndim_is_known(dshp)) return false; - if (param_.kernel.ndim() == 2) { - // 2d conv_v1 - CHECK_EQ(dshp.ndim(), 4U) \ - << "Input data should be 4D in batch-num_filter-y-x"; - Shape<4> dshape = ConvertLayout(dshp.get<4>(), param_.layout.value(), kNCHW); - Shape<4> wshape = Shape4(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, - param_.kernel[0], param_.kernel[1]); - wshape = ConvertLayout(wshape, kNCHW, param_.layout.value()); - wshape[0] *= param_.num_group; - SHAPE_ASSIGN_CHECK(*in_shape, conv_v1::kWeight, wshape); - if (!param_.no_bias) { - SHAPE_ASSIGN_CHECK(*in_shape, conv_v1::kBias, Shape1(param_.num_filter)); - } - - const index_t ksize_y = static_cast(param_.kernel[0]); - const index_t ksize_x = static_cast(param_.kernel[1]); - CHECK_EQ(dshape[1] % param_.num_group, 0) \ - << "input num_filter must divide group size"; - CHECK_EQ(param_.num_filter % param_.num_group, 0) \ - << "output num_filter must divide group size"; - CHECK_GT(param_.kernel.Size(), 0) \ - << "incorrect kernel size: " << param_.kernel; - CHECK_GT(param_.stride.Size(), 0) \ - << "incorrect stride size: " << param_.stride; - CHECK_GT(param_.dilate.Size(), 0) \ - << "incorrect dilate size: " << param_.dilate; - CHECK(ksize_y <= dshape[2] + 2 * param_.pad[0] - && ksize_x <= dshape[3] + 2 * param_.pad[1]) - << "kernel size exceed input"; - Shape<4> oshape; - oshape[0] = dshape[0]; - oshape[1] = param_.num_filter; - oshape[2] = (dshape[2] + 2 * param_.pad[0] - - (param_.dilate[0] * (ksize_y - 1) + 1)) / param_.stride[0] + 1; - oshape[3] = (dshape[3] + 2 * param_.pad[1] - - (param_.dilate[1] * (ksize_x - 1) + 1)) / param_.stride[1] + 1; - SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value())); - return true; - } else if (param_.kernel.ndim() == 3) { - // 3d conv_v1 - CHECK_EQ(dshp.ndim(), 5U) \ - << "Input data should be 5D in batch-num_filter-depth-y-x"; - Shape<5> dshape = ConvertLayout(dshp.get<5>(), param_.layout.value(), kNCDHW); - Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, - param_.kernel[0], param_.kernel[1], param_.kernel[2]); - wshape = ConvertLayout(wshape, kNCDHW, param_.layout.value()); - wshape[0] *= param_.num_group; - SHAPE_ASSIGN_CHECK(*in_shape, conv_v1::kWeight, wshape); - if (!param_.no_bias) { - SHAPE_ASSIGN_CHECK(*in_shape, conv_v1::kBias, Shape1(param_.num_filter)); - } - - const index_t ksize_d = static_cast(param_.kernel[0]); - const index_t ksize_y = static_cast(param_.kernel[1]); - const index_t ksize_x = static_cast(param_.kernel[2]); - CHECK_EQ(dshape[1] % param_.num_group, 0) - << "input num_filter must divide group size"; - CHECK_EQ(param_.num_filter % param_.num_group, 0) - << "output num_filter must divide group size"; - CHECK_GT(param_.kernel.Size(), 0) \ - << "incorrect kernel size: " << param_.kernel; - CHECK_GT(param_.stride.Size(), 0) \ - << "incorrect stride size: " << param_.stride; - CHECK_GT(param_.dilate.Size(), 0) \ - << "incorrect dilate size: " << param_.dilate; - CHECK(ksize_d <= dshape[2] + 2 * param_.pad[0] - && ksize_y <= dshape[3] + 2 * param_.pad[1] - && ksize_x <= dshape[4] + 2 * param_.pad[2]) - << "kernel size exceed input"; - CHECK_EQ(param_.dilate.Size(), 1U) - << "Dilate is not supported in 3d convolution"; - Shape<5> oshape; - oshape[0] = dshape[0]; - oshape[1] = param_.num_filter; - oshape[2] = (dshape[2] + 2 * param_.pad[0] - - (1 * (ksize_d - 1) + 1)) / param_.stride[0] + 1; - oshape[3] = (dshape[3] + 2 * param_.pad[1] - - (1 * (ksize_y - 1) + 1)) / param_.stride[1] + 1; - oshape[4] = (dshape[4] + 2 * param_.pad[2] - - (1 * (ksize_x - 1) + 1)) / param_.stride[2] + 1; - SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value())); - return true; - } else { - LOG(FATAL) << "Unknown convolution type"; - return false; - } - } - - bool InferType(std::vector *in_type, - std::vector *out_type, - std::vector *aux_type) const override { - CHECK_GE(in_type->size(), 1); - int dtype = (*in_type)[0]; - CHECK_NE(dtype, -1) << "First input must have specified type"; - for (size_t i = 0; i < in_type->size(); ++i) { - if ((*in_type)[i] == -1) { - (*in_type)[i] = dtype; - } else { - UNIFORM_TYPE_CHECK((*in_type)[i], dtype, ListArguments()[i]); - } - } - out_type->clear(); - out_type->push_back(dtype); - return true; - } - - OperatorProperty* Copy() const override { - auto ptr = new ConvolutionV1Prop(); - ptr->param_ = param_; - return ptr; - } - - std::string TypeString() const override { - return "Convolution_v1"; - } - - std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { - return {out_grad[conv_v1::kOut], in_data[conv_v1::kData], in_data[conv_v1::kWeight]}; - } - - std::vector ForwardResource( - const mxnet::ShapeVector &in_shape) const override { - return {ResourceRequest::kTempSpace}; - } - - std::vector BackwardResource( - const mxnet::ShapeVector &in_shape) const override { - return {ResourceRequest::kTempSpace}; - } - - Operator* CreateOperator(Context ctx) const override { - LOG(FATAL) << "Not Implemented."; - return nullptr; - } - - Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape, - std::vector *in_type) const override; - - private: - ConvolutionV1Param param_; -}; // class ConvolutionV1Prop -#endif // DMLC_USE_CXX11 -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_CONVOLUTION_V1_INL_H_ diff --git a/src/operator/convolution_v1.cc b/src/operator/convolution_v1.cc deleted file mode 100644 index 5d1ce3108a3f..000000000000 --- a/src/operator/convolution_v1.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file convolution_v1.cc - * \brief - * \author Bing Xu -*/ - -#include "./convolution_v1-inl.h" -namespace mxnet { -namespace op { -DMLC_REGISTER_PARAMETER(ConvolutionV1Param); - -template<> -Operator* CreateOp(ConvolutionV1Param param, int dtype, - mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - Context ctx) { - Operator *op = nullptr; - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - op = new ConvolutionV1Op(param); - }) - return op; -} - -// DO_BIND_DISPATCH comes from operator_common.h -Operator *ConvolutionV1Prop::CreateOperatorEx(Context ctx, - mxnet::ShapeVector *in_shape, - std::vector *in_type) const { - mxnet::ShapeVector out_shape, aux_shape; - std::vector out_type, aux_type; - CHECK(InferType(in_type, &out_type, &aux_type)); - CHECK(InferShape(in_shape, &out_shape, &aux_shape)); - DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], in_shape, &out_shape, ctx); -} - -MXNET_REGISTER_OP_PROPERTY(Convolution_v1, ConvolutionV1Prop) -.add_argument("data", "NDArray-or-Symbol", "Input data to the ConvolutionV1Op.") -.add_argument("weight", "NDArray-or-Symbol", "Weight matrix.") -.add_argument("bias", "NDArray-or-Symbol", "Bias parameter.") -.add_arguments(ConvolutionV1Param::__FIELDS__()) -.describe("This operator is DEPRECATED." - " Apply convolution to input then add a bias."); - -} // namespace op -} // namespace mxnet diff --git a/src/operator/convolution_v1.cu b/src/operator/convolution_v1.cu deleted file mode 100644 index 0f40c30eeb1d..000000000000 --- a/src/operator/convolution_v1.cu +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file convolution_v1.cu - * \brief - * \author Bing Xu -*/ - -#include "./convolution_v1-inl.h" -#include -#if MXNET_USE_CUDNN == 1 -#include "./nn/cudnn/cudnn_convolution-inl.h" -#endif // MXNET_USE_CUDNN - -namespace mxnet { -namespace op { -template<> -Operator* CreateOp(ConvolutionV1Param param, int dtype, - mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - Context ctx) { - Operator *op = nullptr; - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - op = new ConvolutionV1Op(param); - }) - return op; -} - -} // namespace op -} // namespace mxnet - diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h deleted file mode 100644 index 6c7845d9d33b..000000000000 --- a/src/operator/pooling_v1-inl.h +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file pooling_v1-inl.h - * \brief - * \author Bing Xu -*/ - -#ifndef MXNET_OPERATOR_POOLING_V1_INL_H_ -#define MXNET_OPERATOR_POOLING_V1_INL_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "./operator_common.h" - -namespace mxnet { -namespace op { - -namespace pool_v1_enum { -enum PoolingV1OpInputs {kData}; -enum PoolingV1OpOutputs {kOut}; -enum PoolingV1OpType {kMaxPooling, kAvgPooling, kSumPooling}; -enum PoolingV1OpPadConventionType {kValid, kFull}; -} // namespace pool_v1_enum - -struct PoolingV1Param : public dmlc::Parameter { - mxnet::TShape kernel; - mxnet::TShape stride; - mxnet::TShape pad; - int pool_type; - int pooling_convention; - bool global_pool; - DMLC_DECLARE_PARAMETER(PoolingV1Param) { - DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0, -1)) - .enforce_nonzero() - .describe("pooling kernel size: (y, x) or (d, y, x)"); - - DMLC_DECLARE_FIELD(pool_type).set_default(pool_v1_enum::kMaxPooling) - .add_enum("max", pool_v1_enum::kMaxPooling) - .add_enum("avg", pool_v1_enum::kAvgPooling) - .add_enum("sum", pool_v1_enum::kSumPooling) - .describe("Pooling type to be applied."); - - DMLC_DECLARE_FIELD(global_pool).set_default(false) - .describe("Ignore kernel size, do global pooling based on current input feature map. "); - - DMLC_DECLARE_FIELD(pooling_convention).set_default(pool_v1_enum::kValid) - .add_enum("full", pool_v1_enum::kFull) - .add_enum("valid", pool_v1_enum::kValid) - .describe("Pooling convention to be applied."); - - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, -1)) - .enforce_nonzero() - .describe("stride: for pooling (y, x) or (d, y, x)"); - - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, -1)) - .describe("pad for pooling: (y, x) or (d, y, x)"); - } -}; - -template -class PoolingV1Op : public Operator { - public: - explicit PoolingV1Op(PoolingV1Param p) { - this->param_ = p; - } - - virtual void Forward(const OpContext &ctx, - const std::vector &in_data, - const std::vector &req, - const std::vector &out_data, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - Stream *s = ctx.get_stream(); - if (param_.kernel.ndim() == 3) { - LOG(FATAL) << "3D kernel not implemented"; - } - - // reset padding size for global pooling - mxnet::TShape padding = param_.pad; - // mxnet::TShape kernel = param_.kernel; - if (param_.global_pool) { - padding[0] = padding[1] = 0; - // kernel[0] = kernel[1] = 0; - } - - Tensor data = in_data[pool_v1_enum::kData].get(s); - Tensor out = out_data[pool_v1_enum::kOut].get(s); - mshadow::Shape<2> out_shape = Shape2(out.shape_[2], out.shape_[3]); - if (param_.pool_type == pool_v1_enum::kMaxPooling - || param_.pool_type == pool_v1_enum::kSumPooling) { - Assign(out, - req[pool_v1_enum::kOut], - pool(pad(data, padding[0], padding[1]), - out_shape, - param_.global_pool ? data.shape_[2] : param_.kernel[0], - param_.global_pool ? data.shape_[3] : param_.kernel[1], - param_.global_pool ? 1 : param_.stride[0], - param_.global_pool ? 1 : param_.stride[1])); - } else if (param_.pool_type == pool_v1_enum::kAvgPooling) { - Assign(out, - req[pool_v1_enum::kOut], - scalar(1.0f / (param_.global_pool ? - data.shape_[2] * data.shape_[3] : - param_.kernel[0] * param_.kernel[1])) * \ - pool(pad(data, padding[0], padding[1]), - out_shape, - param_.global_pool ? data.shape_[2] : param_.kernel[0], - param_.global_pool ? data.shape_[3] : param_.kernel[1], - param_.global_pool ? 1 : param_.stride[0], - param_.global_pool ? 1 : param_.stride[1])); - } - } - - virtual void Backward(const OpContext &ctx, - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &req, - const std::vector &in_grad, - const std::vector &aux_args) { - using namespace mshadow; - using namespace mshadow::expr; - CHECK_EQ(out_grad.size(), 1); - CHECK_EQ(in_data.size(), 1); - CHECK_EQ(out_data.size(), 1); - CHECK_EQ(req.size(), 1); - CHECK_EQ(in_grad.size(), 1); - // TODO(bing): remove pad (0,0) - if (param_.kernel.ndim() == 3) { - LOG(FATAL) << "3D kernel not implemented"; - } - - // reset padding size for global pooling - mxnet::TShape padding = param_.pad; - if (param_.global_pool) { - padding[0] = padding[1] = 0; - } - - Stream *s = ctx.get_stream(); - Tensor grad = out_grad[pool_v1_enum::kOut].get(s); - Tensor data = in_data[pool_v1_enum::kData].get(s); - Tensor output_data = out_data[pool_v1_enum::kOut].get(s); - Tensor input_grad = in_grad[pool_v1_enum::kData].get(s); - - mshadow::Shape<2> in_shape = Shape2(data.shape_[2], data.shape_[3]); - - if (param_.pool_type == pool_v1_enum::kMaxPooling - || param_.pool_type == pool_v1_enum::kSumPooling) { - Assign(input_grad, req[pool_v1_enum::kData], - crop(unpool(pad(data, padding[0], padding[1]), - pad(output_data, 0, 0), - pad(grad, 0, 0), - param_.global_pool ? in_shape[0] : param_.kernel[0], - param_.global_pool ? in_shape[1] : param_.kernel[1], - param_.global_pool ? 1 : param_.stride[0], - param_.global_pool ? 1 : param_.stride[1]), - in_shape, - padding[0], - padding[1])); - } else if (param_.pool_type == pool_v1_enum::kAvgPooling) { - Assign(input_grad, req[pool_v1_enum::kData], - scalar(1.0f / (param_.global_pool ? - data.shape_[2] * data.shape_[3] : - param_.kernel[0] * param_.kernel[1])) * \ - crop(unpool(pad(data, padding[0], padding[1]), - pad(output_data, 0, 0), - pad(grad, 0, 0), - param_.global_pool ? in_shape[0] : param_.kernel[0], - param_.global_pool ? in_shape[1] : param_.kernel[1], - param_.global_pool ? 1 : param_.stride[0], - param_.global_pool ? 1 : param_.stride[1]), - in_shape, - padding[0], - padding[1])); - } - } - - private: - PoolingV1Param param_; -}; // class PoolingV1Op - -template -Operator* CreateOp(PoolingV1Param param, int dtype); - - -#if DMLC_USE_CXX11 -class PoolingV1Prop : public OperatorProperty { - public: - void Init(const std::vector >& kwargs) override { - using namespace mshadow; - param_.Init(kwargs); - if (param_.kernel.ndim() == 1) { - if (param_.stride.ndim() == 0) param_.stride = Shape1(1); - if (param_.pad.ndim() == 0) param_.pad = Shape1(0); - } else if (param_.kernel.ndim() == 2) { - if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); - } else { - // ignore kernel size only if global_pool not assigned false - if (param_.global_pool == false) { - CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() - << "D pooling not supported"; - } - if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0); - } - } - - std::map GetParams() const override { - return param_.__DICT__(); - } - - bool InferShape(mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape, - mxnet::ShapeVector *aux_shape) const override { - CHECK_EQ(in_shape->size(), 1U); - const mxnet::TShape &dshape = (*in_shape)[0]; - CHECK_GE(dshape.ndim(), 4) << "Pooling: Input data should be 4D in (batch, channel, y, x) " - << "Or 5D in (batch, channel, d, y, x)"; - CHECK_LE(dshape.ndim(), 5) << "Pooling: Input data should be 4D in (batch, channel, y, x) " - << "Or 5D in (batch, channel, d, y, x)"; - mxnet::TShape oshape = dshape; - if (dshape.ndim() == -1) return false; - if (param_.global_pool) { - if (dshape.ndim() == 4) { - oshape[2] = 1; - oshape[3] = 1; - } else { - oshape[2] = 1; - oshape[3] = 1; - oshape[4] = 1; - } - out_shape->clear(); - out_shape->push_back(oshape); - } else if (param_.kernel.ndim() == 2) { - CHECK_EQ(dshape.ndim(), 4) << "Pooling: Input data should be 4D in (batch, channel, y, x)"; - CHECK(param_.kernel[0] <= dshape[2] + 2 * param_.pad[0]) - << "kernel size (" << param_.kernel[0] << ") exceeds input (" << dshape[2] - << " padded to " << (dshape[2] + 2*param_.pad[0]) << ")"; - CHECK(param_.kernel[1] <= dshape[3] + 2 * param_.pad[1]) - << "kernel size (" << param_.kernel[1] << ") exceeds input (" << dshape[3] - << " padded to " << (dshape[3] + 2*param_.pad[1]) << ")"; - if (param_.pooling_convention == pool_v1_enum::kValid) { - oshape[2] = 1 + (dshape[2] + 2 * param_.pad[0] - param_.kernel[0]) / - param_.stride[0]; - oshape[3] = 1 + (dshape[3] + 2 * param_.pad[1] - param_.kernel[1]) / - param_.stride[1]; - } else { - oshape[2] = 1 + static_cast(std::ceil(static_cast( - dshape[2] + 2 * param_.pad[0] - - param_.kernel[0]) / param_.stride[0])); - oshape[3] = 1 + static_cast(std::ceil(static_cast( - dshape[3] + 2 * param_.pad[1] - - param_.kernel[1]) / param_.stride[1])); - } - - out_shape->clear(); - out_shape->push_back(oshape); - } else if (param_.kernel.ndim() == 3) { - CHECK_EQ(dshape.ndim(), 5) << "Pooling: Input data should be 5D in (batch, channel, d, y, x)"; - CHECK_LE(param_.kernel[0], dshape[2] + 2 * param_.pad[0]) << "kernel size exceeds input"; - CHECK_LE(param_.kernel[1], dshape[3] + 2 * param_.pad[1]) << "kernel size exceeds input"; - CHECK_LE(param_.kernel[2], dshape[4] + 2 * param_.pad[2]) << "kernel size exceeds input"; - if (param_.pooling_convention == pool_v1_enum::kValid) { - oshape[2] = 1 + (dshape[2] + 2 * param_.pad[0] - param_.kernel[0]) / - param_.stride[0]; - oshape[3] = 1 + (dshape[3] + 2 * param_.pad[1] - param_.kernel[1]) / - param_.stride[1]; - oshape[4] = 1 + (dshape[4] + 2 * param_.pad[2] - param_.kernel[2]) / - param_.stride[2]; - } else { - oshape[2] = 1 + static_cast(std::ceil(static_cast( - dshape[2] + 2 * param_.pad[0] - - param_.kernel[0]) / param_.stride[0])); - oshape[3] = 1 + static_cast(std::ceil(static_cast( - dshape[3] + 2 * param_.pad[1] - - param_.kernel[1]) / param_.stride[1])); - oshape[4] = 1 + static_cast(std::ceil(static_cast( - dshape[4] + 2 * param_.pad[2] - - param_.kernel[2]) / param_.stride[2])); - } - - out_shape->clear(); - out_shape->push_back(oshape); - } - return true; - } - - bool InferType(std::vector *in_type, - std::vector *out_type, - std::vector *aux_type) const override { - CHECK_EQ(in_type->size(), 1); - int dtype = (*in_type)[0]; - - if (dtype == -1) { - LOG(FATAL) << "Input type to pooling is not specified."; - return false; - } - - out_type->clear(); - out_type->push_back(dtype); - return true; - } - - OperatorProperty* Copy() const override { - PoolingV1Prop *prop_sym = new PoolingV1Prop(); - prop_sym->param_ = this->param_; - return prop_sym; - } - - std::string TypeString() const override { - return "Pooling_v1"; - } - - std::vector DeclareBackwardDependency( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data) const override { - return {out_grad[pool_v1_enum::kOut], in_data[pool_v1_enum::kData], - out_data[pool_v1_enum::kOut]}; - } - - std::vector > BackwardInplaceOption( - const std::vector &out_grad, - const std::vector &in_data, - const std::vector &out_data, - const std::vector &in_grad) const override { -#if MXNET_USE_CUDNN == 1 - return {}; -#else - return {{in_data[pool_v1_enum::kData], in_grad[pool_v1_enum::kData]}}; -#endif - } - - Operator* CreateOperator(Context ctx) const override { - LOG(FATAL) << "Not Implemented."; - return nullptr; - } - - Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape, - std::vector *in_type) const override; - - private: - PoolingV1Param param_; -}; // class PoolingV1Prop -#endif // DMLC_USE_CXX11 -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_POOLING_V1_INL_H_ diff --git a/src/operator/pooling_v1.cc b/src/operator/pooling_v1.cc deleted file mode 100644 index 9e350e88c9ee..000000000000 --- a/src/operator/pooling_v1.cc +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file pooling_v1.cc - * \brief - * \author Bing Xu -*/ -#include "./pooling_v1-inl.h" - -namespace mxnet { -namespace op { - -template<> -Operator *CreateOp(PoolingV1Param param, int dtype) { - Operator *op = nullptr; - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - switch (param.pool_type) { - case pool_v1_enum::kMaxPooling: - op = new PoolingV1Op(param); - break; - case pool_v1_enum::kAvgPooling: - op = new PoolingV1Op(param); - break; - case pool_v1_enum::kSumPooling: - op = new PoolingV1Op(param); - break; - default: - LOG(FATAL) << "unknown pooling type"; - return nullptr; - } - }) - - return op; -} - -// DO_BIND_DISPATCH comes from operator_common.h -Operator* PoolingV1Prop::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape, - std::vector *in_type) const { - mxnet::ShapeVector out_shape, aux_shape; - std::vector out_type, aux_type; - CHECK(InferType(in_type, &out_type, &aux_type)); - CHECK(InferShape(in_shape, &out_shape, &aux_shape)); - DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]); -} - -DMLC_REGISTER_PARAMETER(PoolingV1Param); - -MXNET_REGISTER_OP_PROPERTY(Pooling_v1, PoolingV1Prop) -.describe(R"code(This operator is DEPRECATED. -Perform pooling on the input. - -The shapes for 2-D pooling is - -- **data**: *(batch_size, channel, height, width)* -- **out**: *(batch_size, num_filter, out_height, out_width)*, with:: - - out_height = f(height, kernel[0], pad[0], stride[0]) - out_width = f(width, kernel[1], pad[1], stride[1]) - -The definition of *f* depends on ``pooling_convention``, which has two options: - -- **valid** (default):: - - f(x, k, p, s) = floor((x+2*p-k)/s)+1 - -- **full**, which is compatible with Caffe:: - - f(x, k, p, s) = ceil((x+2*p-k)/s)+1 - -But ``global_pool`` is set to be true, then do a global pooling, namely reset -``kernel=(height, width)``. - -Three pooling options are supported by ``pool_type``: - -- **avg**: average pooling -- **max**: max pooling -- **sum**: sum pooling - -1-D pooling is special case of 2-D pooling with *weight=1* and -*kernel[1]=1*. - -For 3-D pooling, an additional *depth* dimension is added before -*height*. Namely the input data will have shape *(batch_size, channel, depth, -height, width)*. - -)code" ADD_FILELINE) -.add_argument("data", "NDArray-or-Symbol", "Input data to the pooling operator.") -.add_arguments(PoolingV1Param::__FIELDS__()); - -} // namespace op -} // namespace mxnet diff --git a/src/operator/pooling_v1.cu b/src/operator/pooling_v1.cu deleted file mode 100644 index f648a7c1f909..000000000000 --- a/src/operator/pooling_v1.cu +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file pooling_v1.cu - * \brief - * \author Bing Xu -*/ -#include -#include "./pooling_v1-inl.h" - -namespace mxnet { -namespace op { -template<> -Operator *CreateOp(PoolingV1Param param, int dtype) { - Operator *op = nullptr; - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - switch (param.pool_type) { - case pool_v1_enum::kMaxPooling: - op = new PoolingV1Op(param); - break; - case pool_v1_enum::kAvgPooling: - op = new PoolingV1Op(param); - break; - case pool_v1_enum::kSumPooling: - op = new PoolingV1Op(param); - break; - default: - LOG(FATAL) << "unknown pooling type"; - return nullptr; - } - }); - return op; -} - -} // namespace op -} // namespace mxnet - diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 519c02f141e9..37e0eedafc05 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -727,16 +727,12 @@ def test_2D_with_width(width, grad_req): def test_convolution_versions(): # 2D convolution NCHW ctx_list = [{'ctx': mx.cpu(0), 'conv_data': (2, 2, 7, 7), 'type_dict': {'conv_data': np.float32}}, - {'ctx': mx.gpu(0), 'conv_data': (2, 2, 7, 7), 'type_dict': {'conv_data': np.float32}}, - {'ctx': mx.gpu(0), 'conv_data': (2, 2, 7, 7), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.cpu(0), 'conv_data': (2, 2, 7, 7), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (2, 2, 7, 7), 'type_dict': {'conv_data': np.float32}}] - conv_v1_cpu = mx.sym.Convolution_v1(num_filter=3, kernel=(3,3), pad=(1,1), name='conv') - conv_v1_gpu = mx.sym.Convolution_v1(num_filter=3, kernel=(3,3), pad=(1,1), cudnn_off=True, name='conv') conv_cudnn = mx.sym.Convolution(num_filter=3, kernel=(3,3), pad=(1,1), name='conv') conv_cpu = mx.sym.Convolution(num_filter=3, kernel=(3,3), pad=(1,1), name='conv') conv_gpu = mx.sym.Convolution(num_filter=3, kernel=(3,3), pad=(1,1), cudnn_off=True, name='conv') - syms = [conv_v1_cpu, conv_v1_gpu, conv_cudnn, conv_cpu, conv_gpu] + syms = [conv_cudnn, conv_cpu, conv_gpu] check_consistency(syms, ctx_list) # 3D convolution NCDHW @@ -1043,30 +1039,27 @@ def test_pooling_versions_helper(pool_op_list, data, kernel, pool_type, pad, str if not is_default_stride(stride) or random_choice(): pool_op_args.update({'stride' : stride}) - expected_pool_ops = ['pool', 'pool_transposed', 'pool_v1'] - if pool_op == 'pool_v1': - sym = mx.sym.Pooling_v1(**pool_op_args) + expected_pool_ops = ['pool', 'pool_transposed'] + pool_op_args.update({'p_value' : p_value, 'count_include_pad' : count_include_pad}) + if ctx_type != 'cpu': + pool_op_args['cudnn_off'] = ctx_type == 'gpu' + if pool_op == 'pool': + # isolate pooling input from symbol input to test shared tensor optimizations + buffered_input = mx.sym.identity(name='pool') + sym = mx.sym.Pooling(buffered_input, **pool_op_args) + elif pool_op == 'pool_transposed': + ndim = len(data) + # NCW->NWC axes=(0,2,1) NCHW->NHWC axes=(0,2,3,1) NCDHW->NDHWC axes=(0,2,3,4,1); + axes = (0,) + tuple(range(2,ndim)) + (1,) + transposed = mx.sym.transpose(axes=axes, name='pool') + pooled = mx.sym.Pooling(data=transposed, layout=transposed_layout(ndim), + **pool_op_args) + # NWC->NCW axes=(0,2,1) NHWC->NCHW axes=(0,3,1,2) NDHWC->NCDHW axes=(0,4,1,2,3); + axes = (0, ndim-1) + tuple(range(1,ndim-1)) + sym = mx.sym.transpose(data=pooled, axes=axes, name='pool') else: - pool_op_args.update({'p_value' : p_value, 'count_include_pad' : count_include_pad}) - if ctx_type != 'cpu': - pool_op_args['cudnn_off'] = ctx_type == 'gpu' - if pool_op == 'pool': - # isolate pooling input from symbol input to test shared tensor optimizations - buffered_input = mx.sym.identity(name='pool') - sym = mx.sym.Pooling(buffered_input, **pool_op_args) - elif pool_op == 'pool_transposed': - ndim = len(data) - # NCW->NWC axes=(0,2,1) NCHW->NHWC axes=(0,2,3,1) NCDHW->NDHWC axes=(0,2,3,4,1); - axes = (0,) + tuple(range(2,ndim)) + (1,) - transposed = mx.sym.transpose(axes=axes, name='pool') - pooled = mx.sym.Pooling(data=transposed, layout=transposed_layout(ndim), - **pool_op_args) - # NWC->NCW axes=(0,2,1) NHWC->NCHW axes=(0,3,1,2) NDHWC->NCDHW axes=(0,4,1,2,3); - axes = (0, ndim-1) + tuple(range(1,ndim-1)) - sym = mx.sym.transpose(data=pooled, axes=axes, name='pool') - else: - raise RuntimeError('Expected one of {}, saw {}.'.format(expected_pool_ops, - pool_op)) + raise RuntimeError('Expected one of {}, saw {}.'.format(expected_pool_ops, + pool_op)) sym_list.append(sym) check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), rtol=tol, atol=tol) @@ -1128,10 +1121,6 @@ def test_pooling_dim(dim, pool_type, dtype, pool_op_list, p_value=2, count_inclu std_pool_op_list = ['pool_cpu', 'pool_transposed_cpu', 'pool_gpu', 'pool_transposed_gpu', 'pool_cudnn', 'pool_transposed_cudnn'] - # The implementations of the 'v1' pooling operator - v1_pool_op_list = ['pool_v1_cpu', 'pool_v1_gpu'] - # For those cases when all implementations should match- the combined implementation list. - combo_pool_op_list = std_pool_op_list + v1_pool_op_list for dtype in [np.float32, np.float64, np.float16]: # Testing of the standard (not 'v1') pooling operator is universal across all @@ -1145,47 +1134,6 @@ def test_pooling_dim(dim, pool_type, dtype, pool_op_list, p_value=2, count_inclu test_pooling_dim(dim, 'lp', dtype, std_pool_op_list, p_value=2) test_pooling_dim(dim, 'lp', dtype, std_pool_op_list, p_value=3) - # Testing of the 'v1' pooling operator is over its restricted support domain of - # 2D data only and not with the 'lp' pooling type. The 'v1' cpu and gpu versions are - # always tested against each other, and sometimes against the standard operator versions. - # The slightly different 'v1' definition prevents this in the following cases: - # - # 1. In max pooling, when multiple input values are the maximum in the input window, - # the 'v1' implementation backprops the gradient to all maxima, whereas the standard - # pooling operator backprops the gradient to the lowest-indexed maximum only. - # 2. In max pooling, the 'v1' operator pads with 0's and this value can become the - # maximum output value in the case of an all-negative input. The standard pooling - # operator effectively considers the padding to be the largest negative value, so - # only input values should appear in the output. - # 3. In avg pooling, the 'v1' operator divides the sum by the same window size factor, - # even at the edges, and so does not support count_include_pad = False. - # 4. The float16 'v1' pooling operator performs forward sums and averages in - # float16, whereas the std operators perform those calculations in float32, so - # greater float16 tolerances are needed when comparing across implementations. - - # Double the float16 tol when comparing v1 and non-v1 implemenations, per note 4 above. - relaxed_tol = {np.dtype(np.float16): 2e-1, - np.dtype(np.float32): 1e-3, - np.dtype(np.float64): 1e-5, - np.dtype(np.uint8): 0, - np.dtype(np.int32): 0, - np.dtype(np.int64): 0} - - # Exclude std implementations due to points 1 and 2 above. - test_pooling_dim('2D', 'max', dtype, v1_pool_op_list) - # The standard and 'v1' implementations match for this case. - test_pooling_dim('2D', 'avg', dtype, combo_pool_op_list, count_include_pad=True, - tol=relaxed_tol) - # Exclude std implementations due to point 3 above. - test_pooling_dim('2D', 'avg', dtype, v1_pool_op_list, count_include_pad=False) - # The standard and 'v1' implementations match for this case. - test_pooling_dim('2D', 'sum', dtype, combo_pool_op_list, tol=relaxed_tol) - - # We can compare the standard and 'v1' max pooling implementations if we eliminate padding - # (see point 2 above) and use np.float64 data so that no two random input window values are - # likely to be the same (see point 1 above). - test_pooling_dim('2D_no_padding', 'max', np.float64, combo_pool_op_list) - @with_seed() def test_pooling_full_2d(): @@ -1318,19 +1266,6 @@ def test_2d_pooling(pool_type, p_value=2): pooling_convention = 'valid' - if pool_type != 'lp': - ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}}) - sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type, - pooling_convention=pooling_convention, global_pool=True, name='pool')) - - ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}}) - sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pool_type=pool_type, - pooling_convention=pooling_convention, global_pool=True, name='pool')) - - ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}}) - sym_list.append(mx.sym.Pooling_v1(pool_type=pool_type, - pooling_convention=pooling_convention, global_pool=True, name='pool')) - ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}}) sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type, pooling_convention=pooling_convention, global_pool=True, p_value=p_value, name='pool')) diff --git a/tests/python/unittest/test_dlpack.py b/tests/python/unittest/test_dlpack.py deleted file mode 100644 index 46bdde7d0bcd..000000000000 --- a/tests/python/unittest/test_dlpack.py +++ /dev/null @@ -1,45 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import ctypes -import mxnet as mx -from mxnet.base import NDArrayHandle, _LIB, c_str, check_call -from mxnet.test_utils import assert_almost_equal - -def test_from_dlpack_backward_compatibility(): - def from_dlpack_old(dlpack): - - PyCapsuleDestructor = ctypes.CFUNCTYPE(None, ctypes.c_void_p) - _c_str_dltensor = c_str('dltensor') - _c_str_used_dltensor = c_str('used_dltensor') - handle = NDArrayHandle() - dlpack = ctypes.py_object(dlpack) - assert ctypes.pythonapi.PyCapsule_IsValid(dlpack, _c_str_dltensor), ValueError( - 'Invalid DLPack Tensor. DLTensor capsules can be consumed only once.') - dlpack_handle = ctypes.c_void_p(ctypes.pythonapi.PyCapsule_GetPointer(dlpack, _c_str_dltensor)) - check_call(_LIB.MXNDArrayFromDLPack(dlpack_handle, ctypes.byref(handle))) - # Rename PyCapsule (DLPack) - ctypes.pythonapi.PyCapsule_SetName(dlpack, _c_str_used_dltensor) - # delete the deleter of the old dlpack - ctypes.pythonapi.PyCapsule_SetDestructor(dlpack, None) - return mx.nd.NDArray(handle=handle) - - x = mx.nd.ones((2,3)) - y = mx.nd.to_dlpack_for_read(x) - z = from_dlpack_old(y) - assert_almost_equal(x.asnumpy(), z.asnumpy(), rtol=1e-5, atol=1e-5) - diff --git a/tests/python/unittest/test_gluon_data_vision.py b/tests/python/unittest/test_gluon_data_vision.py index 320b33d3e28a..eddd77152f5a 100644 --- a/tests/python/unittest/test_gluon_data_vision.py +++ b/tests/python/unittest/test_gluon_data_vision.py @@ -29,6 +29,7 @@ xfail_when_nonstandard_decimal_separator import numpy as np +import pytest @with_seed() def test_to_tensor(): @@ -381,17 +382,19 @@ def test_random_rotation(): def test_random_transforms(): from mxnet.gluon.data.vision import transforms - tmp_t = transforms.Compose([transforms.Resize(300), transforms.RandomResizedCrop(224)]) - transform = transforms.Compose([transforms.RandomApply(tmp_t, 0.5)]) + counter = 0 + def transform_fn(x): + nonlocal counter + counter += 1 + return x + transform = transforms.Compose([transforms.RandomApply(transform_fn, 0.5)]) - img = mx.nd.ones((10, 10, 3), dtype='uint8') - iteration = 1000 + img = mx.np.ones((10, 10, 3), dtype='uint8') + iteration = 10000 num_apply = 0 for _ in range(iteration): out = transform(img) - if out.shape[0] == 224: - num_apply += 1 - assert_almost_equal(num_apply/float(iteration), 0.5, 0.1) + assert counter == pytest.approx(5000, 1e-1) @xfail_when_nonstandard_decimal_separator @with_seed() diff --git a/tests/python/unittest/test_numpy_gluon_data_vision.py b/tests/python/unittest/test_numpy_gluon_data_vision.py index ec82052eff50..deb61de0b38a 100644 --- a/tests/python/unittest/test_numpy_gluon_data_vision.py +++ b/tests/python/unittest/test_numpy_gluon_data_vision.py @@ -32,6 +32,7 @@ from mxnet.base import MXNetError from mxnet.gluon.data.vision import transforms from mxnet import image +import pytest @with_seed() @use_np @@ -341,16 +342,19 @@ def test_random_transforms(): from mxnet.gluon.data.vision import transforms tmp_t = transforms.Compose([transforms.Resize(300), transforms.RandomResizedCrop(224)]) - transform = transforms.Compose([transforms.RandomApply(tmp_t, 0.5)]) + counter = 0 + def transform_fn(x): + nonlocal counter + counter += 1 + return x + transform = transforms.Compose([transforms.RandomApply(transform_fn, 0.5)]) img = mx.np.ones((10, 10, 3), dtype='uint8') - iteration = 1000 + iteration = 10000 num_apply = 0 for _ in range(iteration): out = transform(img) - if out.shape[0] == 224: - num_apply += 1 - assert_almost_equal(num_apply/float(iteration), 0.5, 0.1) + assert counter == pytest.approx(5000, 1e-1) @xfail_when_nonstandard_decimal_separator @with_seed()