Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[v1.8.x] Backport TRT test update #19296 (#19298)
Browse files Browse the repository at this point in the history
* Bypass test_tensorrt.py:test_tensorrt_symbol_int8 on arch < 70

* Adapt test_tensorrt.py:test_tensorrt_symbol for A100

* Fix test_numpy_op.py:test_np_mixed_precision_binary_funcs with portion of (#18660)
  • Loading branch information
DickJC123 authored Oct 12, 2020
1 parent 2755c37 commit 4e4dfd2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
27 changes: 21 additions & 6 deletions tests/python/tensorrt/test_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,23 @@
# under the License.

import os
import sys
import ctypes
import mxnet as mx
from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str_array, c_str, mx_real_t
from mxnet.symbol import Symbol
import numpy as np
from mxnet.test_utils import assert_almost_equal
from mxnet.numpy_extension import get_cuda_compute_capability
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import nd
from mxnet.gluon.model_zoo import vision

curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
from common import setup_module, with_seed, teardown

####################################
######### FP32/FP16 tests ##########
####################################
Expand Down Expand Up @@ -60,7 +66,7 @@ def get_baseline(input_data):
return output


def check_tensorrt_symbol(baseline, input_data, fp16_mode, tol):
def check_tensorrt_symbol(baseline, input_data, fp16_mode, rtol=None, atol=None):
sym, arg_params, aux_params = get_model(batch_shape=input_data.shape)
trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=mx.gpu(0),
precision='fp16' if fp16_mode else 'fp32')
Expand All @@ -69,17 +75,18 @@ def check_tensorrt_symbol(baseline, input_data, fp16_mode, tol):
grad_req='null', force_rebind=True)

output = executor.forward(is_train=False, data=input_data)
assert_almost_equal(output[0].asnumpy(), baseline[0].asnumpy(), atol=tol[0], rtol=tol[1])
assert_almost_equal(output[0], baseline[0], rtol=rtol, atol=atol)

@with_seed()
def test_tensorrt_symbol():
batch_shape = (32, 3, 224, 224)
input_data = mx.nd.random.uniform(shape=(batch_shape), ctx=mx.gpu(0))
baseline = get_baseline(input_data)
print("Testing resnet50 with TensorRT backend numerical accuracy...")
print("FP32")
check_tensorrt_symbol(baseline, input_data, fp16_mode=False, tol=(1e-4, 1e-4))
check_tensorrt_symbol(baseline, input_data, fp16_mode=False)
print("FP16")
check_tensorrt_symbol(baseline, input_data, fp16_mode=True, tol=(1e-1, 1e-2))
check_tensorrt_symbol(baseline, input_data, fp16_mode=True, rtol=1e-2, atol=1e-1)

##############################
######### INT8 tests ##########
Expand Down Expand Up @@ -135,17 +142,25 @@ def get_top1(logits):


def test_tensorrt_symbol_int8():
ctx = mx.gpu(0)
cuda_arch = get_cuda_compute_capability(ctx)
cuda_arch_min = 70
if cuda_arch < cuda_arch_min:
print('Bypassing test_tensorrt_symbol_int8 on cuda arch {}, need arch >= {}).'.format(
cuda_arch, cuda_arch_min))
return

# INT8 engine output are not lossless, so we don't expect numerical uniformity,
# but we have to compare the TOP1 metric

batch_shape=(1,3,224,224)
sym, arg_params, aux_params = get_model(batch_shape=batch_shape)
calibration_iters = 700
trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=mx.gpu(0),
trt_sym = sym.optimize_for('TensorRT', args=arg_params, aux=aux_params, ctx=ctx,
precision='int8',
calibration_iters=calibration_iters)

executor = trt_sym.simple_bind(ctx=mx.gpu(), data=batch_shape,
executor = trt_sym.simple_bind(ctx=ctx, data=batch_shape,
grad_req='null', force_rebind=True)

dali_val_iter = get_dali_iter()
Expand Down
21 changes: 21 additions & 0 deletions tests/python/unittest/test_numpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2528,6 +2528,27 @@ def __init__(self, func):
def hybrid_forward(self, F, a, b, *args, **kwargs):
return getattr(F.np, self._func)(a, b)

if (func in ['multiply', 'mod', 'equal', 'not_equal', 'greater',
'greater_equal', 'less', 'less_equal']) and \
(lshape == () or rshape == ()) :
# the behaviors of infer type in dealing with the input shape of '()' are different between np and onp
# for example,
# mx_test_x1 = np.random.uniform(-2, 2, (2,3)).astype(np.float32)
# mx_test_x2 = np.random.uniform(-2, 2, ()).astype(np.float16)
# np_out = _np.mod(mx_test_x1.asnumpy(), mx_test_x2.asnumpy()) # float16
# mx_out = np.mod(mx_test_x1, mx_test_x2) # float32

# logcial ops: when two numbers are only different in precision, NumPy also has a weird behavior
# for example,
# a = np.array([[1.441]], dtype = np.float16)
# b = np.array(1.4413278, dtype = np.float32)
# c = np.array([1.4413278], dtype = np.float32)
# np.greater(a,b), np.greater(a,c) # True True
# _np.greater(a.asnumpy(),b.asnumpy()), _np.greater(a.asnumpy(),c.asnumpy()) # False True

# thus, skip the tests
return

np_func = getattr(_np, func)
mx_func = TestMixedBinary(func)
np_test_x1 = _np.random.uniform(low, high, lshape).astype(ltype)
Expand Down

0 comments on commit 4e4dfd2

Please sign in to comment.