diff --git a/src/target/spirv/spirv_support.cc b/src/target/spirv/spirv_support.cc index 0f1207f3e9a8..1ef56198df7f 100644 --- a/src/target/spirv/spirv_support.cc +++ b/src/target/spirv/spirv_support.cc @@ -72,6 +72,9 @@ SPIRVSupport::SPIRVSupport(tvm::Target target) { if (target->GetAttr("supports_float16")) { supports_float16 = target->GetAttr("supports_float16").value(); } + if (target->GetAttr("supports_float64")) { + supports_float64 = target->GetAttr("supports_float64").value(); + } if (target->GetAttr("supports_int8")) { supports_int8 = target->GetAttr("supports_int8").value(); } diff --git a/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py b/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py index 81d3b3fd7f3f..93cfecf4239d 100644 --- a/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py +++ b/tests/python/topi/python/test_topi_conv1d_transpose_ncw.py @@ -15,15 +15,18 @@ # specific language governing permissions and limitations # under the License. """Test code for transposed convolution.""" -import numpy as np + import itertools +import os + +import numpy as np + import tvm -from tvm import te -from tvm import topi +import tvm.testing import tvm.topi.testing -from tvm.contrib.pickle_memoize import memoize + +from tvm import te, topi from tvm.topi.utils import get_const_tuple -import tvm.testing _conv1d_transpose_ncw_implement = { "generic": (topi.nn.conv1d_transpose_ncw, topi.generic.schedule_conv1d_transpose_ncw), @@ -31,74 +34,88 @@ } -def verify_conv1d_transpose_ncw( - batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding +( + batch, + in_channel, + in_size, + num_filter, + kernel, + stride, + padding, + output_padding, +) = tvm.testing.parameters( + (1, 3, 224, 32, 5, 1, 0, (0,)), + (1, 3, 224, 32, 7, 1, 2, (0,)), + (1, 3, 224, 32, 5, 2, 1, (0,)), + (1, 3, 224, 32, 5, 2, 1, (1,)), + (1, 3, 224, 32, 5, 2, 0, (0,)), + (1, 32, 32, 128, 5, 1, 0, (0,)), + (1, 32, 32, 128, 5, 2, 1, (0,)), + (1, 1, 1024, 1, 512, 1, 256, (0,)), + (1, 1, 1024, 1, 512, 2, 256, (0,)), + (1, 1, 1024, 1, 512, 5, 256, (0,)), + (1, 1, 1024, 1, 512, 5, 256, (3,)), + (1, 2, 1024, 1, 128, 128, 0, (0,)), + (1, 1, 1024, 2, 128, 128, 0, (0,)), + (1, 1, 1024, 2, 2, 2, 0, (0,)), + (1, 1, 10, 1, 5, 1, (0, 3), (0,)), + (1, 1, 10, 1, 5, 1, (1, 3), (0,)), + (1, 1, 10, 1, 5, 1, (2, 3), (0,)), + (1, 257, 128, 1, 512, 128, 256, (0,)), +) + +dtype = tvm.testing.parameter("float32") + + +@tvm.testing.fixture(cache_return_value=True) +def ref_data( + dtype, batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding +): + dtype = "float32" + a_shape = (batch, in_channel, in_size) + w_shape = (in_channel, num_filter, kernel) + + a_np = np.random.uniform(size=a_shape).astype(dtype) + w_np = np.random.uniform(size=w_shape).astype(dtype) + b_np = tvm.topi.testing.conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding) + c_np = np.maximum(b_np, 0) + return a_np, w_np, b_np, c_np + + +@tvm.testing.known_failing_targets("vulkan") +def test_conv1d_transpose_ncw( + target, + dev, + ref_data, + dtype, + stride, + padding, + output_padding, ): - in_width = in_size - A = te.placeholder((batch, in_channel, in_width), name="A") - W = te.placeholder((in_channel, num_filter, kernel), name="W") - - a_shape = get_const_tuple(A.shape) - w_shape = get_const_tuple(W.shape) - dtype = A.dtype - - @memoize("topi.tests.test_topi_conv1d_transpose.verify_conv1d_transpose_ncw") - def get_ref_data(): - a_np = np.random.uniform(size=a_shape).astype(dtype) - w_np = np.random.uniform(size=w_shape).astype(dtype) - b_np = tvm.topi.testing.conv1d_transpose_ncw_python( - a_np, w_np, stride, padding, output_padding - ) - c_np = np.maximum(b_np, 0) - return a_np, w_np, b_np, c_np - - a_np, w_np, b_np, c_np = get_ref_data() - - def check_target(target, dev): - dev = tvm.device(target, 0) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv1d_transpose_ncw_implement) - B = fcompute(A, W, stride, padding, A.dtype, output_padding) - C = topi.nn.relu(B) - s1 = fschedule([B]) - s2 = fschedule([C]) - a = tvm.nd.array(a_np, dev) - w = tvm.nd.array(w_np, dev) - b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) - c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev) - - func1 = tvm.build(s1, [A, W, B], target) - func2 = tvm.build(s2, [A, W, C], target) - func1(a, w, b) - func2(a, w, c) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) - tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5) - - for target, dev in tvm.testing.enabled_targets(): - check_target(target, dev) - - -@tvm.testing.uses_gpu -def test_conv1d_transpose_ncw(): - verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 1, 0, (0,)) - verify_conv1d_transpose_ncw(1, 3, 224, 32, 7, 1, 2, (0,)) - verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 2, 1, (0,)) - verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 2, 1, (1,)) - verify_conv1d_transpose_ncw(1, 3, 224, 32, 5, 2, 0, (0,)) - verify_conv1d_transpose_ncw(1, 32, 32, 128, 5, 1, 0, (0,)) - verify_conv1d_transpose_ncw(1, 32, 32, 128, 5, 2, 1, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 1, 512, 1, 256, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 1, 512, 2, 256, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 1, 512, 5, 256, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 1, 512, 5, 256, (3,)) - verify_conv1d_transpose_ncw(1, 2, 1024, 1, 128, 128, 0, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 2, 128, 128, 0, (0,)) - verify_conv1d_transpose_ncw(1, 1, 1024, 2, 2, 2, 0, (0,)) - verify_conv1d_transpose_ncw(1, 1, 10, 1, 5, 1, (0, 3), (0,)) - verify_conv1d_transpose_ncw(1, 1, 10, 1, 5, 1, (1, 3), (0,)) - verify_conv1d_transpose_ncw(1, 1, 10, 1, 5, 1, (2, 3), (0,)) - verify_conv1d_transpose_ncw(1, 257, 128, 1, 512, 128, 256, (0,)) + + a_np, w_np, b_np, c_np = ref_data + + A = te.placeholder(a_np.shape, name="A", dtype=dtype) + W = te.placeholder(w_np.shape, name="W", dtype=dtype) + + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv1d_transpose_ncw_implement) + B = fcompute(A, W, stride, padding, A.dtype, output_padding) + C = topi.nn.relu(B) + s1 = fschedule([B]) + s2 = fschedule([C]) + a = tvm.nd.array(a_np, dev) + w = tvm.nd.array(w_np, dev) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) + c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev) + + func1 = tvm.build(s1, [A, W, B], target) + func2 = tvm.build(s2, [A, W, C], target) + func1(a, w, b) + func2(a, w, c) + tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) + tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5) if __name__ == "__main__": - test_conv1d_transpose_ncw() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_conv2d_nhwc.py b/tests/python/topi/python/test_topi_conv2d_nhwc.py index eb4c5a343b58..96359860f569 100644 --- a/tests/python/topi/python/test_topi_conv2d_nhwc.py +++ b/tests/python/topi/python/test_topi_conv2d_nhwc.py @@ -27,8 +27,8 @@ _conv2d_nhwc_implement = { - "llvm": (topi.nn.conv2d_nhwc, topi.generic.schedule_conv2d_nhwc), - "cuda": (topi.cuda.conv2d_nhwc, topi.cuda.schedule_conv2d_nhwc), + "generic": (topi.nn.conv2d_nhwc, topi.generic.schedule_conv2d_nhwc), + "gpu": (topi.cuda.conv2d_nhwc, topi.cuda.schedule_conv2d_nhwc), "cpu": (topi.nn.conv2d_nhwc, topi.x86.schedule_conv2d_nhwc), "arm_cpu": ( topi.arm_cpu.conv2d_nhwc_spatial_pack, @@ -45,61 +45,55 @@ "hls": (topi.nn.conv2d_nhwc, topi.hls.schedule_conv2d_nhwc), } +dtype = tvm.testing.parameter("float32") -def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1): - in_height = in_width = in_size - - A = te.placeholder((batch, in_height, in_width, in_channel), name="A") - W = te.placeholder((kernel, kernel, in_channel, num_filter), name="W") +batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation = tvm.testing.parameters( + (1, 256, 32, 256, 3, 1, "SAME", 1), + (4, 128, 16, 128, 5, 2, "SAME", 1), + (4, 128, 16, 256, 5, 2, "SAME", 1), + (1, 256, 32, 256, 3, 1, "VALID", 1), + (1, 256, 32, 256, 3, 1, "VALID", 1), + (4, 128, 16, 128, 5, 2, "VALID", 1), + (4, 128, 16, 256, 5, 2, "VALID", 1), + (1, 128, 16, 256, 3, 2, (0, 0, 1, 1), 1), + (1, 128, 16, 256, 3, 2, (1, 1, 2, 2), 1), + (1, 128, 16, 128, 5, 2, (3, 3, 2, 2), 1), + (1, 128, 16, 256, 3, 2, (0, 1, 2, 3), 1), + (1, 256, 32, 256, 3, 1, "SAME", 2), + (1, 256, 32, 256, 3, 1, (1, 1, 2, 2), 2), +) - a_shape = get_const_tuple(A.shape) - w_shape = get_const_tuple(W.shape) - dtype = A.dtype - @memoize("topi.tests.test_topi_conv2d_nhwc.verify_nhwc.v2") - def get_ref_data(): - a_np = np.random.uniform(size=a_shape).astype(dtype) - w_np = np.random.uniform(size=w_shape).astype(dtype) - dw_np = tvm.topi.testing.dilate_python(w_np, (dilation, dilation, 1, 1)) - b_np = tvm.topi.testing.conv2d_nhwc_python(a_np, dw_np, stride, padding) - return a_np, w_np, b_np +@tvm.testing.fixture(cache_return_value=True) +def ref_data(dtype, batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation): + in_height = in_width = in_size + a_shape = (batch, in_height, in_width, in_channel) + w_shape = (kernel, kernel, in_channel, num_filter) - a_np, w_np, b_np = get_ref_data() + a_np = np.random.uniform(size=a_shape).astype(dtype) + w_np = np.random.uniform(size=w_shape).astype(dtype) + dw_np = tvm.topi.testing.dilate_python(w_np, (dilation, dilation, 1, 1)) + b_np = tvm.topi.testing.conv2d_nhwc_python(a_np, dw_np, stride, padding) + return a_np, w_np, b_np - def check_device(target, dev): - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv2d_nhwc_implement) - B = fcompute(A, W, stride, padding, dilation, dtype) - s = fschedule([B]) - a = tvm.nd.array(a_np, dev) - w = tvm.nd.array(w_np, dev) - b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) - func = tvm.build(s, [A, W, B], target) - func(a, w, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) - for target, dev in tvm.testing.enabled_targets(): - check_device(target, dev) +def test_conv2d_nhwc(target, dev, ref_data, dtype, stride, padding, dilation): + a_np, w_np, b_np = ref_data + A = te.placeholder(a_np.shape, name="A", dtype=dtype) + W = te.placeholder(w_np.shape, name="W", dtype=dtype) -@tvm.testing.uses_gpu -def test_conv2d_nhwc(): - verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "SAME") - verify_conv2d_nhwc(4, 128, 16, 128, 5, 2, "SAME") - verify_conv2d_nhwc(4, 128, 16, 256, 5, 2, "SAME") - verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "VALID") - verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "VALID") - verify_conv2d_nhwc(4, 128, 16, 128, 5, 2, "VALID") - verify_conv2d_nhwc(4, 128, 16, 256, 5, 2, "VALID") - verify_conv2d_nhwc(1, 128, 16, 256, 3, 2, (0, 0, 1, 1)) - verify_conv2d_nhwc(1, 128, 16, 256, 3, 2, (1, 1, 2, 2)) - verify_conv2d_nhwc(1, 128, 16, 128, 5, 2, (3, 3, 2, 2)) - verify_conv2d_nhwc(1, 128, 16, 256, 3, 2, (0, 1, 2, 3)) - # dilation = 2 - verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, "SAME", dilation=2) - verify_conv2d_nhwc(1, 256, 32, 256, 3, 1, (1, 1, 2, 2), dilation=2) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv2d_nhwc_implement) + B = fcompute(A, W, stride, padding, dilation, dtype) + s = fschedule([B]) + a = tvm.nd.array(a_np, dev) + w = tvm.nd.array(w_np, dev) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) + func = tvm.build(s, [A, W, B], target) + func(a, w, b) + tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) if __name__ == "__main__": - test_conv2d_nhwc() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_correlation.py b/tests/python/topi/python/test_topi_correlation.py index e6323065d9be..3dff54dfa694 100644 --- a/tests/python/topi/python/test_topi_correlation.py +++ b/tests/python/topi/python/test_topi_correlation.py @@ -15,125 +15,82 @@ # specific language governing permissions and limitations # under the License """test of correlation operator in NCHW layout""" +import sys + import numpy as np +import pytest + import tvm -from tvm import te -from tvm import autotvm -from tvm import topi -import tvm.testing import tvm.topi.testing -from tvm.contrib.pickle_memoize import memoize -from tvm.topi.utils import get_const_tuple + +from tvm import autotvm, te, topi _correlation_implement = { "generic": (topi.nn.correlation_nchw, topi.generic.schedule_correlation_nchw), - "cuda": (topi.cuda.correlation_nchw, topi.cuda.schedule_correlation_nchw), + "gpu": (topi.cuda.correlation_nchw, topi.cuda.schedule_correlation_nchw), } +( + data_shape, + kernel_size, + max_displacement, + stride1, + stride2, + pad_size, + is_multiply, +) = tvm.testing.parameters( + ((1, 3, 10, 10), 1, 4, 1, 1, 4, True), + ((1, 3, 10, 10), 1, 5, 1, 1, 5, True), + ((5, 1, 4, 4), 3, 1, 2, 1, 2, True), + ((5, 1, 6, 4), 3, 1, 2, 2, 2, False), + ((5, 1, 11, 11), 5, 1, 1, 1, 2, False), +) + +dtype = tvm.testing.parameter("float32") -def verify_correlation_nchw( - data_shape, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply + +@tvm.testing.fixture(cache_return_value=True) +def ref_data( + dtype, data_shape, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply ): - print( - "Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" - % ( - data_shape[0], - data_shape[1], - data_shape[2], - data_shape[3], - kernel_size, - max_displacement, - stride1, - stride2, - pad_size, - is_multiply, - ) + a_np = np.random.uniform(size=data_shape).astype(dtype) + b_np = np.random.uniform(size=data_shape).astype(dtype) + c_np = tvm.topi.testing.correlation_nchw_python( + a_np, b_np, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply ) + return a_np, b_np, c_np - A = te.placeholder(data_shape, name="data1") - B = te.placeholder(data_shape, name="data2") - dtype = A.dtype - - @memoize("topi.tests.test_topi_correlation_nchw.verify_correlation_nchw") - def get_ref_data(): - a_np = np.random.uniform(size=data_shape).astype(dtype) - b_np = np.random.uniform(size=data_shape).astype(dtype) - c_np = tvm.topi.testing.correlation_nchw_python( - a_np, b_np, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply - ) - return a_np, b_np, c_np - - a_np, b_np, c_np = get_ref_data() - - def check_device(target, dev): - print("Running on target: %s" % target) - fcompute, fschedule = tvm.topi.testing.dispatch(target, _correlation_implement) - with tvm.target.Target(target): - C = fcompute( - A, B, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply - ) - s = fschedule([C]) - - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(b_np, dev) - c = tvm.nd.empty(c_np.shape, dtype=dtype, device=dev) - - func = tvm.build(s, [A, B, C], target) - func(a, b, c) - tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5) - - for target, dev in tvm.testing.enabled_targets(): - check_device(target, dev) - - -@tvm.testing.uses_gpu -def test_correlation_nchw(): - verify_correlation_nchw( - (1, 3, 10, 10), - kernel_size=1, - max_displacement=4, - stride1=1, - stride2=1, - pad_size=4, - is_multiply=True, - ) - verify_correlation_nchw( - (1, 3, 10, 10), - kernel_size=1, - max_displacement=5, - stride1=1, - stride2=1, - pad_size=5, - is_multiply=True, - ) - verify_correlation_nchw( - (5, 1, 4, 4), - kernel_size=3, - max_displacement=1, - stride1=2, - stride2=1, - pad_size=2, - is_multiply=True, - ) - verify_correlation_nchw( - (5, 1, 6, 4), - kernel_size=3, - max_displacement=1, - stride1=2, - stride2=2, - pad_size=2, - is_multiply=False, - ) - verify_correlation_nchw( - (5, 1, 11, 11), - kernel_size=5, - max_displacement=1, - stride1=1, - stride2=1, - pad_size=2, - is_multiply=False, - ) + +def test_correlation_nchw( + target, + dev, + ref_data, + dtype, + kernel_size, + max_displacement, + stride1, + stride2, + pad_size, + is_multiply, +): + a_np, b_np, c_np = ref_data + + A = te.placeholder(a_np.shape, name="data1", dtype=dtype) + B = te.placeholder(b_np.shape, name="data2", dtype=dtype) + + fcompute, fschedule = tvm.topi.testing.dispatch(target, _correlation_implement) + with tvm.target.Target(target): + C = fcompute(A, B, kernel_size, max_displacement, stride1, stride2, pad_size, is_multiply) + s = fschedule([C]) + + a = tvm.nd.array(a_np, dev) + b = tvm.nd.array(b_np, dev) + c = tvm.nd.empty(c_np.shape, dtype=dtype, device=dev) + + func = tvm.build(s, [A, B, C], target) + func(a, b, c) + tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5) if __name__ == "__main__": - test_correlation_nchw() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_loss.py b/tests/python/topi/python/test_topi_loss.py index bb7655b192f5..c1b61e5b49cd 100644 --- a/tests/python/topi/python/test_topi_loss.py +++ b/tests/python/topi/python/test_topi_loss.py @@ -25,9 +25,17 @@ import tvm.testing -def verify_nll_loss( - dev, target, prediction_shape, reduction="mean", ignore_index=-100, dtype="float32" -): +prediction_shape, reduction, ignore_index, dtype = tvm.testing.parameters( + ((10, 5), "mean", -100, "float32"), + ((10, 5, 2, 2), "mean", -100, "float32"), + ((10, 5), "sum", -100, "float32"), + ((10, 5), "none", -100, "float32"), + ((10, 5), "mean", 3, "float32"), + ((10, 5), "mean", -100, "float64"), +) + + +def test_nll_loss(target, dev, prediction_shape, reduction, ignore_index, dtype): C = prediction_shape[1] target_shape = prediction_shape[:1] + prediction_shape[2:] predictions = te.placeholder(shape=prediction_shape, name="predictions", dtype=dtype) @@ -56,15 +64,5 @@ def verify_nll_loss( tvm.testing.assert_allclose(out_topi, out_npy, rtol=1e-4, atol=1e-5) -@tvm.testing.parametrize_targets -def test_nll_loss(dev, target): - verify_nll_loss(dev, target, (10, 5)) - verify_nll_loss(dev, target, (10, 5, 2, 2)) - verify_nll_loss(dev, target, (10, 5), reduction="sum") - verify_nll_loss(dev, target, (10, 5), reduction="none") - verify_nll_loss(dev, target, (10, 5), ignore_index=3) - verify_nll_loss(dev, target, (10, 5), dtype="float64") - - if __name__ == "__main__": - test_nll_loss(tvm.device("cpu"), tvm.target.Target("llvm")) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_math.py b/tests/python/topi/python/test_topi_math.py index c7f80033bdf3..5ee049fa379a 100644 --- a/tests/python/topi/python/test_topi_math.py +++ b/tests/python/topi/python/test_topi_math.py @@ -14,14 +14,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + +import sys + import numpy as np +import pytest import scipy from scipy import special + import tvm -from tvm import te -from tvm import topi import tvm.testing import tvm.topi.testing + +from tvm import te, topi from tvm.topi import utils @@ -31,211 +36,205 @@ def test_util(): assert utils.get_const_tuple((x, x)) == (100, 100) -@tvm.testing.uses_gpu -def test_ewise(): - def test_apply( - func, - name, - f_numpy, - low, - high, - shape=(20, 3), - dtype="float32", - check_round=False, - skip_name_check=False, - ): - m = te.var("m") - l = te.var("l") - A = te.placeholder((m, l), dtype=dtype, name="A") - - B = func(A) - assert tuple(B.shape) == tuple(A.shape) - if not skip_name_check: - assert B.op.body[0].op.name == "tir." + name - a_np = np.random.uniform(low=low, high=high, size=shape).astype(A.dtype) * 10 - # avoid round check too close to boundary - if check_round: - a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-4 - b_np = f_numpy(a_np) - - def check_target(target, dev): - print("Running on target: %s" % target) - with tvm.target.Target(target): - s = tvm.topi.testing.get_injective_schedule(target)(B) - foo = tvm.build(s, [A, B], target, name=name) - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros_like(b_np), dev) - foo(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5, atol=1e-5) - - for target, dev in tvm.testing.enabled_targets(): - check_target(target, dev) - - def test_isnan( - low, - high, - shape=(20, 3), - dtype="float32", - check_round=False, - skip_name_check=False, - ): - m = te.var("m") - l = te.var("l") - A = te.placeholder((m, l), dtype=dtype, name="A") - - B = topi.isnan(A) - assert tuple(B.shape) == tuple(A.shape) - if not skip_name_check: - assert B.op.body[0].op.name == "tir.isnan" - a_np = np.random.uniform(low=low, high=high, size=shape).astype(A.dtype) * 10 - a_np.ravel()[np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False)] = np.nan - # avoid round check too close to boundary - if check_round: - a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-5 - b_np = np.isnan(a_np) - - def check_target(target, dev): - print("Running on target: %s" % target) - with tvm.target.Target(target): - s = tvm.topi.testing.get_injective_schedule(target)(B) - foo = tvm.build(s, [A, B], target, name="isnan") - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros_like(b_np), dev) - foo(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5, atol=1e-5) - - for target, dev in tvm.testing.enabled_targets(): - check_target(target, dev) - - def test_infiniteness_ops(topi_op, ref_op, name): - for dtype in ["float32", "float64", "int32", "int16"]: - m = te.var("m") - l = te.var("l") - A = te.placeholder((m, l), dtype=dtype, name="A") - B = topi_op(A) - assert tuple(B.shape) == tuple(A.shape) - - a_np = np.random.uniform(size=(8, 8)).astype(A.dtype) * 10 - if dtype.startswith("float"): - a_np.ravel()[ - np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False) - ] = np.infty - a_np.ravel()[ - np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False) - ] = np.nan - b_np = ref_op(a_np) - - def check_target(target, dev): - with tvm.target.Target(target): - s = tvm.topi.testing.get_injective_schedule(target)(B) - foo = tvm.build(s, [A, B], target, name=name) - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros_like(b_np), dev) - foo(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5, atol=1e-5) - - for target, dev in tvm.testing.enabled_targets(): - check_target(target, dev) - - test_apply(topi.floor, "floor", np.floor, -100, 100) - test_apply(topi.ceil, "ceil", np.ceil, -100, 100) - test_apply(topi.sign, "sign", np.sign, -100, 100, skip_name_check=True) - test_apply(topi.trunc, "trunc", np.trunc, -100, 100) - test_apply(topi.abs, "fabs", np.abs, -100, 100) - test_apply(topi.round, "round", np.round, -100, 100, check_round=True) - test_apply(topi.exp, "exp", np.exp, -1, 1) - test_apply(topi.tanh, "tanh", np.tanh, -10, 10, shape=(128, 128)) - test_apply(topi.tanh, "tanh", np.tanh, -10, 10, shape=(128, 128), dtype="float64") - test_apply(topi.sigmoid, "sigmoid", lambda x: 1 / (1 + np.exp(-x)), -1, 1) - test_apply(topi.log, "log", np.log, 0, 100) - test_apply(topi.sqrt, "sqrt", np.sqrt, 0, 100) - test_apply( - topi.rsqrt, "rsqrt", lambda x: np.ones_like(x) / np.sqrt(x), 0, 100, skip_name_check=True - ) - test_apply(topi.cos, "cos", np.cos, -2.0 * np.pi, 2.0 * np.pi) - test_apply(topi.tan, "tan", np.tan, -2.0 * np.pi, 2.0 * np.pi, dtype="float32") - test_apply(topi.tan, "tan", np.tan, -2.0 * np.pi, 2.0 * np.pi, dtype="float64") - test_apply(topi.sin, "sin", np.sin, -2.0 * np.pi, 2.0 * np.pi) - test_apply(topi.erf, "erf", scipy.special.erf, -0.1, 0.1, dtype="float32") - test_isnan(-100, 100) - test_infiniteness_ops(topi.isfinite, np.isfinite, "isifinite") - test_infiniteness_ops(topi.isinf, np.isinf, "isinf") - - -@tvm.testing.uses_gpu -def test_cast(): - def verify(from_dtype, to_dtype, low=-100, high=100): - shape = (5, 4) - A = te.placeholder(shape, dtype=from_dtype, name="A") - B = topi.cast(A, to_dtype) - - if from_dtype == "bool": - a_np = np.random.choice([True, False], size=shape) - else: - a_np = np.random.uniform(low, high, size=shape).astype(from_dtype) - if to_dtype == "bool": - a_np = a_np - a_np[2, 3] - b_np = a_np.astype(to_dtype) - - for target, dev in tvm.testing.enabled_targets(): - print("Running on target: %s" % target) - with tvm.target.Target(target): - s = tvm.topi.testing.get_injective_schedule(target)(B) - foo = tvm.build(s, [A, B], target) - a = tvm.nd.array(a_np, dev) - b = tvm.nd.empty(shape=shape, dtype=to_dtype, device=dev) - foo(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np) - - verify("int32", "float32") - verify("int32", "float64") - verify("int32", "bool") - verify("float32", "int32") - verify("float32", "float64") - verify("float32", "bool") - verify("bool", "float32") - verify("bool", "int32") - - -def test_fastmath(): - def test_apply(func, name, f_numpy, low, high, step, dtype="float32"): - a_np = np.arange(low, high, step).astype(dtype).reshape((1, -1)) - b_np = f_numpy(a_np) - A = te.placeholder(a_np.shape, dtype=dtype, name="A") - B = func(A) - assert tuple(B.shape) == tuple(A.shape) - - def check_target(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - with tvm.target.Target(target): - s = topi.generic.schedule_injective(B) - func = tvm.build(s, [A, B], target, name=name) - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros_like(b_np), dev) - func(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5, atol=1e-5) - - check_target("llvm") - check_target("llvm -device=arm-cpu") - - test_apply(topi.fast_exp, "fast_exp", np.exp, low=-88, high=88, step=0.01) - test_apply(topi.fast_erf, "fast_erf", scipy.special.erf, low=-10, high=10, step=0.01) - test_apply(topi.fast_tanh, "fast_tanh", np.tanh, low=-10, high=10, step=0.01) - test_apply( - topi.nn.fast_softmax, - "fast_softmax", - tvm.topi.testing.softmax_python, - low=-10, - high=10, - step=0.01, - ) +ewise_operations = { + "floor": {"topi": topi.floor, "ref": np.floor, "input_range": (-100, 100)}, + "ceil": {"topi": topi.ceil, "ref": np.ceil, "input_range": (-100, 100)}, + "sign": { + "topi": topi.sign, + "ref": np.sign, + "input_range": (-100, 100), + "skip_name_check": True, + }, + "trunc": {"topi": topi.trunc, "ref": np.trunc, "input_range": (-100, 100)}, + "fabs": {"topi": topi.abs, "ref": np.fabs, "input_range": (-100, 100)}, + "round": {"topi": topi.round, "ref": np.round, "input_range": (-100, 100), "check_round": True}, + "exp": {"topi": topi.exp, "ref": np.exp, "input_range": (-1, 1)}, + "tanh": { + "topi": topi.tanh, + "ref": np.tanh, + "input_range": (-10, 10), + "shape": (128, 128), + "dtype": ["float32", "float64"], + }, + "sigmoid": { + "topi": topi.sigmoid, + "ref": lambda x: 1 / (1 + np.exp(-x)), + "input_range": (-1, 1), + }, + "log": {"topi": topi.log, "ref": np.log, "input_range": (0, 100)}, + "sqrt": {"topi": topi.sqrt, "ref": np.sqrt, "input_range": (0, 100)}, + "rsqrt": { + "topi": topi.rsqrt, + "ref": lambda x: np.ones_like(x) / np.sqrt(x), + "input_range": (0, 100), + "skip_name_check": True, + }, + "cos": {"topi": topi.cos, "ref": np.cos, "input_range": (-2.0 * np.pi, 2.0 * np.pi)}, + "tan": { + "topi": topi.tan, + "ref": np.tan, + "input_range": (-2.0 * np.pi, 2.0 * np.pi), + "dtypes": ["float32", "float64"], + }, + "sin": {"topi": topi.sin, "ref": np.sin, "input_range": (-2.0 * np.pi, 2.0 * np.pi)}, + "erf": {"topi": topi.erf, "ref": scipy.special.erf, "input_range": (-0.1, 0.1)}, + "isnan": { + "topi": topi.isnan, + "ref": np.isnan, + "input_range": (-1, 1), + "replace_with_nan": True, + }, + "isfinite": { + "topi": topi.isfinite, + "ref": np.isfinite, + "input_range": (0, 1), + "shape": (8, 8), + "skip_name_check": True, + "replace_with_nan": True, + "replace_with_inf": True, + "dtypes": ["float32", "float64", "int32", "int16"], + }, + "isinf": { + "topi": topi.isinf, + "ref": np.isinf, + "input_range": (0, 1), + "shape": (8, 8), + "skip_name_check": True, + "replace_with_nan": True, + "replace_with_inf": True, + "dtypes": ["float32", "float64", "int32", "int16"], + }, + "fast_exp": { + "topi": topi.fast_exp, + "ref": np.exp, + "skip_name_check": True, + "input_range": (-88, 88), + "step": 0.01, + }, + "fast_erf": { + "topi": topi.fast_erf, + "ref": scipy.special.erf, + "skip_name_check": True, + "input_range": (-10, 10), + "step": 0.01, + }, + "fast_erf": { + "topi": topi.fast_tanh, + "ref": np.tanh, + "skip_name_check": True, + "input_range": (-10, 10), + "step": 0.01, + }, +} + +topi_name, dtype = tvm.testing.parameters( + *[ + (name, dtype) + for name, config in ewise_operations.items() + for dtype in config.get("dtypes", ["float32"]) + ] +) + + +@tvm.testing.fixture(cache_return_value=True) +def ewise_ref_data(topi_name, dtype): + config = ewise_operations[topi_name] + + input_range = config["input_range"] + shape = config.get("shape", (20, 3)) + + a_np = np.random.uniform(*input_range, size=shape).astype(dtype) + + if dtype.startswith("float"): + if config.get("replace_with_nan", False): + a_np.ravel()[np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False)] = np.nan + if config.get("replace_with_inf", False): + a_np.ravel()[ + np.random.choice(a_np.size, int(a_np.size * 0.5), replace=False) + ] = np.infty + + # avoid round check too close to boundary + if topi_name == "round": + a_np += ((np.abs(np.fmod(a_np, 1)) - 0.5) < 1e-6) * 1e-4 + + b_np = config["ref"](a_np) + + return a_np, b_np + + +def test_ewise(target, dev, topi_name, dtype, ewise_ref_data): + target = tvm.target.Target(target) + if target.kind.name == "vulkan" and topi_name in ["tan", "erf", "isnan", "isfinite", "isinf"]: + pytest.xfail(f"Vulkan runtime doesn't support {topi_name} yet") + + topi_op = ewise_operations[topi_name]["topi"] + skip_name_check = ewise_operations[topi_name].get("skip_name_check", False) + + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=dtype, name="A") + + B = topi_op(A) + assert tuple(B.shape) == tuple(A.shape) + if not skip_name_check: + assert B.op.body[0].op.name == "tir." + topi_name + + a_np, b_np = ewise_ref_data + + with tvm.target.Target(target): + s = tvm.topi.testing.get_injective_schedule(target)(B) + foo = tvm.build(s, [A, B], target, name=topi_name) + a = tvm.nd.array(a_np, dev) + b = tvm.nd.array(np.zeros_like(b_np), dev) + foo(a, b) + tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5, atol=1e-5) + + +from_dtype, to_dtype = tvm.testing.parameters( + ("int32", "float32"), + ("int32", "float64"), + ("int32", "bool"), + ("float32", "int32"), + ("float32", "float64"), + ("float32", "bool"), + ("bool", "float32"), + ("bool", "int32"), +) + + +@tvm.testing.fixture(cache_return_value=True) +def cast_ref_data(from_dtype, to_dtype): + shape = (5, 4) + input_range = (-100, 100) + + if from_dtype == "bool": + a_np = np.random.choice([True, False], size=shape) + else: + a_np = np.random.uniform(*input_range, size=shape).astype(from_dtype) + + if to_dtype == "bool": + a_np = a_np - a_np[2, 3] + b_np = a_np.astype(to_dtype) + + return a_np, b_np + + +def test_cast(target, dev, cast_ref_data, from_dtype, to_dtype): + m = te.var("m") + l = te.var("l") + A = te.placeholder((m, l), dtype=from_dtype, name="A") + B = topi.cast(A, to_dtype) + + a_np, b_np = cast_ref_data + + with tvm.target.Target(target): + s = tvm.topi.testing.get_injective_schedule(target)(B) + foo = tvm.build(s, [A, B], target) + a = tvm.nd.array(a_np, dev) + b = tvm.nd.empty(b_np.shape, dtype=to_dtype, device=dev) + foo(a, b) + tvm.testing.assert_allclose(b.numpy(), b_np) if __name__ == "__main__": - test_util() - test_ewise() - test_cast() - test_fastmath() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_reduce.py b/tests/python/topi/python/test_topi_reduce.py index 07656032f878..23d762c5002a 100644 --- a/tests/python/topi/python/test_topi_reduce.py +++ b/tests/python/topi/python/test_topi_reduce.py @@ -16,23 +16,76 @@ # under the License. """Test code for reduce.""" import os +import sys + import numpy as np +import pytest + import tvm -from tvm import te -from tvm import topi import tvm.testing import tvm.topi.testing +from tvm import te, topi + +in_shape, axis, keepdims, reduce_type, dtype = tvm.testing.parameters( + ((32,), 0, False, "argmax", "float32"), + ((128, 24, 128, 24), (1, 2, 3), True, "sum", "float32"), + ((2, 3), None, True, "all", "bool"), + ((128, 24 * 128 * 24), (1,), False, "max", "float32"), + ((32, 128, 24), None, True, "sum", "float32"), + ((32, 128, 24), None, True, "all", "bool"), + ((128, 24, 128, 24), (0, 2), False, "min", "float32"), + ((32, 128), 1, True, "argmax", "float32"), + ((32, 24, 32, 24), 2, False, "argmin", "float32"), + ((31, 21, 15), None, True, "argmax", "float32"), + ((31, 21, 15), None, False, "sum", "float32"), + ((128, 24, 128, 24), (1, 2, 3), True, "sum", "float64"), + ((2, 3), None, True, "any", "bool"), + ((32, 128, 24), None, True, "any", "bool"), + ((1, 4, 7), 1, True, "any", "bool"), + ((128, 24, 128, 24), 2, False, "any", "bool"), +) + + +@tvm.testing.fixture(cache_return_value=True) +def ref_data(in_shape, axis, keepdims, reduce_type, dtype): + # Test + if dtype == "bool": + in_npy_map = in_npy = np.random.choice([True, False], size=in_shape) + else: + in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype) + in_npy_map = np.sqrt(np.exp(in_npy)).astype(dtype) + + if reduce_type == "sum": + out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims) + elif reduce_type == "all" and dtype == "bool": + out_npy = in_npy_map.all(axis=axis, keepdims=keepdims) + elif reduce_type == "any" and dtype == "bool": + out_npy = in_npy_map.any(axis=axis, keepdims=keepdims) + elif reduce_type == "max": + out_npy = in_npy_map.max(axis=axis, keepdims=keepdims) + elif reduce_type == "min": + out_npy = in_npy_map.min(axis=axis, keepdims=keepdims) + elif reduce_type == "argmax": + out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims) + elif reduce_type == "argmin": + out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims) + else: + raise NotImplementedError + + return in_npy, in_npy_map, out_npy + def _my_npy_argmax(arr, axis, keepdims): if not keepdims: return arr.argmax(axis=axis) else: - if axis is not None: + if axis is None: + out_shape = [1 for _ in arr.shape] + else: out_shape = list(arr.shape) out_shape[axis] = 1 - else: - out_shape = [1 for _ in range(len(arr.shape))] + return arr.argmax(axis=axis).reshape(out_shape) @@ -40,120 +93,72 @@ def _my_npy_argmin(arr, axis, keepdims): if not keepdims: return arr.argmin(axis=axis) else: - out_shape = list(arr.shape) - out_shape[axis] = 1 + if axis is None: + out_shape = [1 for _ in arr.shape] + else: + out_shape = list(arr.shape) + out_shape[axis] = 1 return arr.argmin(axis=axis).reshape(out_shape) -def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32"): +def test_reduce_map(target, dev, ref_data, in_shape, axis, keepdims, reduce_type, dtype): + target = tvm.target.Target(target) + if target.kind.name == "vulkan" and reduce_type in ["sum", "any", "all"]: + pytest.xfail(f"Vulkan backend has known errors on {reduce_type}") + + in_npy, in_npy_map, out_npy = ref_data + # Build the logic and compile the function A = te.placeholder(shape=in_shape, name="A", dtype=dtype) A1 = topi.sqrt(topi.exp(A)) out_dtype = dtype - if type == "sum": + if reduce_type == "sum": B = topi.sum(A1, axis=axis, keepdims=keepdims) - elif type == "all": + elif reduce_type == "all": B = topi.all(A, axis=axis, keepdims=keepdims) - elif type == "any": + elif reduce_type == "any": B = topi.any(A, axis=axis, keepdims=keepdims) - elif type == "max": + elif reduce_type == "max": B = topi.max(A1, axis=axis, keepdims=keepdims) - elif type == "min": + elif reduce_type == "min": B = topi.min(A1, axis=axis, keepdims=keepdims) - elif type == "argmax": + elif reduce_type == "argmax": B = topi.argmax(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" - elif type == "argmin": + elif reduce_type == "argmin": B = topi.argmin(A1, axis=axis, keepdims=keepdims) out_dtype = "int32" else: raise NotImplementedError - def check_device(device, dev): - print("Running on target: %s" % device) - with tvm.target.Target(device): - s = tvm.topi.testing.get_reduce_schedule(device)(B) + with tvm.target.Target(target): + s = tvm.topi.testing.get_reduce_schedule(target)(B) - foo = tvm.build(s, [A, B], device, name=type) - # Test - if dtype == "bool": - in_npy_map = in_npy = np.random.choice([True, False], size=in_shape) - else: - in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype) - in_npy_map = np.sqrt(np.exp(in_npy)).astype(dtype) - - if type == "sum": - out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims) - elif type == "all" and dtype == "bool": - out_npy = in_npy_map.all(axis=axis, keepdims=keepdims) - elif type == "any" and dtype == "bool": - out_npy = in_npy_map.any(axis=axis, keepdims=keepdims) - elif type == "max": - out_npy = in_npy_map.max(axis=axis, keepdims=keepdims) - elif type == "min": - out_npy = in_npy_map.min(axis=axis, keepdims=keepdims) - elif type == "argmax": - out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims) - elif type == "argmin": - out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims) - else: - raise NotImplementedError - data_tvm = tvm.nd.array(in_npy, device=dev) - out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=out_dtype) - for _ in range(1): - foo(data_tvm, out_tvm) - if type == "argmax" or type == "argmin": - out_tvm_indices = out_tvm.numpy() - if keepdims: - out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) - if axis is None: - out_tvm_val = in_npy_map.ravel()[out_tvm_indices] - else: - other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis + 1) :])) - sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:] - out_tvm_val = in_npy_map[sel_indices] - if type == "argmax": - tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) - elif type == "argmin": - tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) + foo = tvm.build(s, [A, B], target, name=reduce_type) + + data_tvm = tvm.nd.array(in_npy, device=dev) + out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=out_dtype) + foo(data_tvm, out_tvm) + + if reduce_type == "argmax" or reduce_type == "argmin": + out_tvm_indices = out_tvm.numpy() + if keepdims: + out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis) + if axis is None: + out_tvm_val = in_npy_map.ravel()[out_tvm_indices] else: - tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3) - - for device, dev in tvm.testing.enabled_targets(): - check_device(device, dev) - - -@tvm.testing.uses_gpu -def test_reduce_map(): - - verify_reduce_map_ele(in_shape=(32,), axis=0, keepdims=False, type="argmax") - verify_reduce_map_ele(in_shape=(128, 24, 128, 24), axis=(1, 2, 3), keepdims=True, type="sum") - verify_reduce_map_ele(in_shape=(2, 3), axis=None, keepdims=True, type="all", dtype="bool") - verify_reduce_map_ele(in_shape=(128, 24 * 128 * 24), axis=(1,), keepdims=False, type="max") - verify_reduce_map_ele(in_shape=(32, 128, 24), axis=None, keepdims=True, type="sum") - verify_reduce_map_ele( - in_shape=(32, 128, 24), axis=None, keepdims=True, dtype="bool", type="all" - ) - verify_reduce_map_ele(in_shape=(128, 24, 128, 24), axis=(0, 2), keepdims=False, type="min") - verify_reduce_map_ele(in_shape=(32, 128), axis=1, keepdims=True, type="argmax") - verify_reduce_map_ele(in_shape=(32, 24, 32, 24), axis=2, keepdims=False, type="argmin") - verify_reduce_map_ele(in_shape=(31, 21, 15), axis=None, keepdims=True, type="argmax") - verify_reduce_map_ele(in_shape=(31, 21, 15), axis=None, keepdims=False, type="sum") - verify_reduce_map_ele( - in_shape=(128, 24, 128, 24), axis=(1, 2, 3), keepdims=True, type="sum", dtype="float64" - ) - verify_reduce_map_ele(in_shape=(2, 3), axis=None, keepdims=True, type="any", dtype="bool") - verify_reduce_map_ele( - in_shape=(32, 128, 24), axis=None, keepdims=True, type="any", dtype="bool" - ) - verify_reduce_map_ele(in_shape=(1, 4, 7), axis=1, keepdims=True, type="any", dtype="bool") - verify_reduce_map_ele( - in_shape=(128, 24, 128, 24), axis=2, keepdims=False, type="any", dtype="bool" - ) - - -@tvm.testing.uses_gpu -def test_complex_reduce(): + other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis + 1) :])) + sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:] + out_tvm_val = in_npy_map[sel_indices] + if reduce_type == "argmax": + tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1e-3, 1e-3) + elif reduce_type == "argmin": + tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1e-3, 1e-3) + else: + tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3) + + +def test_complex_reduce(target, dev): in_shape = (2, 3) dtype = "float32" axis = 0 @@ -163,20 +168,20 @@ def test_complex_reduce(): C = topi.add(B, B) D = topi.multiply(B, B) E = topi.add(C, D) - for device, dev in tvm.testing.enabled_targets(): - print("Running on target: %s" % device) - with tvm.target.Target(device): - s = tvm.topi.testing.get_reduce_schedule(device)(E) - foo = tvm.build(s, [A, E], device, name="sum") - in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype) - sum_npy = in_npy.sum(axis=axis, keepdims=keepdims) - out_npy = sum_npy * 2 + sum_npy * sum_npy - data_tvm = tvm.nd.array(in_npy, device=dev) - out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=dtype) - foo(data_tvm, out_tvm) - tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3) + + with tvm.target.Target(target): + s = tvm.topi.testing.get_reduce_schedule(target)(E) + foo = tvm.build(s, [A, E], target, name="sum") + + in_npy = np.random.uniform(-1, 1, size=in_shape).astype(dtype) + sum_npy = in_npy.sum(axis=axis, keepdims=keepdims) + out_npy = sum_npy * 2 + sum_npy * sum_npy + + data_tvm = tvm.nd.array(in_npy, device=dev) + out_tvm = tvm.nd.empty(shape=out_npy.shape, device=dev, dtype=dtype) + foo(data_tvm, out_tvm) + tvm.testing.assert_allclose(out_tvm.numpy(), out_npy, 1e-3, 1e-3) if __name__ == "__main__": - test_reduce_map() - test_complex_reduce() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_softmax.py b/tests/python/topi/python/test_topi_softmax.py index 8af038a1f7ce..97fbedcc288e 100644 --- a/tests/python/topi/python/test_topi_softmax.py +++ b/tests/python/topi/python/test_topi_softmax.py @@ -15,14 +15,17 @@ # specific language governing permissions and limitations # under the License. """Test code for softmax""" +import logging import os +import sys + import numpy as np +import pytest + import tvm -from tvm import te -from tvm import topi import tvm.testing import tvm.topi.testing -import logging +from tvm import te, topi from tvm.topi.utils import get_const_tuple @@ -34,75 +37,72 @@ } -def check_target(A, B, a_np, b_np, target, dev, name): - print("Running on target: %s" % target) - with tvm.target.Target(target): - s_func = tvm.topi.testing.dispatch(target, _softmax_schedule) - s = s_func(B) - - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) - f = tvm.build(s, [A, B], target, name=name) - f(a, b) - tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) +dtype = tvm.testing.parameter("float32", "float64") -def verify_softmax(m, n, dtype="float32"): - A = te.placeholder((m, n), dtype=dtype, name="A") - B = topi.nn.softmax(A) - # confirm lower works - s = te.create_schedule([B.op]) - tvm.lower(s, [A, B], simple_mode=True) - - a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) - b_np = tvm.topi.testing.softmax_python(a_np) +configs = { + "softmax": { + "topi": topi.nn.softmax, + "ref": tvm.topi.testing.softmax_python, + "dimensions": [2, 4], + }, + "log_softmax": { + "topi": topi.nn.log_softmax, + "ref": tvm.topi.testing.log_softmax_python, + "dimensions": [2], + }, +} +shapes = [(32, 10), (3, 4), (1, 16, 256, 256)] +softmax_operation, shape = tvm.testing.parameters( + *[ + (name, shape) + for name, config in configs.items() + for shape in shapes + if len(shape) in config["dimensions"] + ] +) - for target, dev in tvm.testing.enabled_targets(): - check_target(A, B, a_np, b_np, target, dev, "softmax") +@tvm.testing.fixture(cache_return_value=True) +def ref_data(shape, dtype, softmax_operation): + ref_func = configs[softmax_operation]["ref"] -def verify_softmax_4d(shape, dtype="float32"): - A = te.placeholder(shape, dtype=dtype, name="A") - B = topi.nn.softmax(A, axis=1) + a_np = np.random.uniform(size=shape).astype(dtype) - _, c, h, w = shape - a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) - b_np = tvm.topi.testing.softmax_python(a_np.transpose(0, 2, 3, 1).reshape(h * w, c)) - b_np = b_np.reshape(1, h, w, c).transpose(0, 3, 1, 2) + if len(shape) == 2: + b_np = ref_func(a_np) + elif len(shape) == 4: + _, c, h, w = a_np.shape + a_np_2d = a_np.transpose(0, 2, 3, 1).reshape(h * w, c) + b_np_2d = tvm.topi.testing.softmax_python(a_np_2d) + b_np = b_np_2d.reshape(1, h, w, c).transpose(0, 3, 1, 2) - for target, dev in tvm.testing.enabled_targets(): - check_target(A, B, a_np, b_np, target, dev, "softmax") + return a_np, b_np -@tvm.testing.uses_gpu -def test_softmax(): - verify_softmax(32, 10) - verify_softmax(3, 4) - verify_softmax(32, 10, "float64") - verify_softmax_4d((1, 16, 256, 256)) +def test_softmax(target, dev, shape, dtype, ref_data, softmax_operation): + target = tvm.target.Target(target) + if target.kind.name == "vulkan" and dtype == "float64": + # https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html + pytest.xfail("Vulkan GLSL.std.450 does not support 64-bit floats") + A = te.placeholder(shape, dtype=dtype, name="A") -def verify_log_softmax(m, n, dtype="float32"): - A = te.placeholder((m, n), dtype=dtype, name="A") - B = topi.nn.log_softmax(A) - # confirm lower works - s = te.create_schedule([B.op]) - tvm.lower(s, [A, B], simple_mode=True) - a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype) - b_np = tvm.topi.testing.log_softmax_python(a_np) + topi_op = configs[softmax_operation]["topi"] + B = topi_op(A, axis=1) - for target, dev in tvm.testing.enabled_targets(): - check_target(A, B, a_np, b_np, target, dev, "log_softmax") + with tvm.target.Target(target): + fschedule = tvm.topi.testing.dispatch(target, _softmax_schedule) + s = fschedule(B) + a_np, b_np = ref_data -@tvm.testing.uses_gpu -def test_log_softmax(): - verify_log_softmax(32, 10) - verify_log_softmax(3, 4) - verify_log_softmax(32, 10, "float64") + a = tvm.nd.array(a_np, dev) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev) + f = tvm.build(s, [A, B], target) + f(a, b) + tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG) - test_softmax() - test_log_softmax() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_sort.py b/tests/python/topi/python/test_topi_sort.py index 65b2ae590308..43c6ce88be76 100644 --- a/tests/python/topi/python/test_topi_sort.py +++ b/tests/python/topi/python/test_topi_sort.py @@ -15,13 +15,16 @@ # specific language governing permissions and limitations # under the License. """Test code for vision package""" -from __future__ import print_function +import sys + import numpy as np +import pytest + import tvm -from tvm import te -from tvm import topi -import tvm.topi.testing import tvm.testing +import tvm.topi.testing + +from tvm import te, topi _sort_implement = { "generic": (topi.sort, topi.generic.schedule_sort), @@ -38,8 +41,17 @@ "gpu": (topi.cuda.topk, topi.cuda.schedule_topk), } +axis = tvm.testing.parameter(0, -1, 1) +is_ascend = tvm.testing.parameter(True, False, ids=["is_ascend", "not_ascend"]) +dtype = tvm.testing.parameter("int64", "float32") + +topk = tvm.testing.parameter(0, 1, 5) +topk_ret_type = tvm.testing.parameter("values", "indices", "both") + + +def test_sort(target, dev, axis, is_ascend): + np.random.seed(0) -def verify_sort(axis, is_ascend): dshape = (20, 100) data_dtype = "float32" data = te.placeholder(dshape, name="data", dtype=data_dtype) @@ -58,28 +70,19 @@ def verify_sort(axis, is_ascend): else: np_sort = np_sort[:, : dshape[axis]] - def check_target(target): - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - dev = tvm.device(target, 0) - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _sort_implement) - out = fcompute(data, axis=axis, is_ascend=is_ascend) - s = fschedule(out) - - tvm_data = tvm.nd.array(np_data, dev) - tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), dev) - f = tvm.build(s, [data, out], target) - f(tvm_data, tvm_out) - tvm.testing.assert_allclose(tvm_out.numpy(), np_sort, rtol=1e0) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _sort_implement) + out = fcompute(data, axis=axis, is_ascend=is_ascend) + s = fschedule(out) - for target in ["llvm", "cuda", "opencl", "vulkan", "nvptx"]: - check_target(target) + tvm_data = tvm.nd.array(np_data, dev) + tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), dev) + f = tvm.build(s, [data, out], target) + f(tvm_data, tvm_out) + tvm.testing.assert_allclose(tvm_out.numpy(), np_sort, rtol=1e0) -def verify_argsort(axis, is_ascend): +def test_argsort(target, dev, axis, is_ascend): dshape = (20, 100) data_dtype = "float32" data = te.placeholder(dshape, name="data", dtype=data_dtype) @@ -98,28 +101,21 @@ def verify_argsort(axis, is_ascend): else: np_indices = np_indices[:, : dshape[axis]] - def check_target(target): - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - dev = tvm.device(target, 0) - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _argsort_implement) - out = fcompute(data, axis=axis, is_ascend=is_ascend) - s = fschedule(out) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _argsort_implement) + out = fcompute(data, axis=axis, is_ascend=is_ascend) + s = fschedule(out) - tvm_data = tvm.nd.array(np_data, dev) - tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), dev) - f = tvm.build(s, [data, out], target) - f(tvm_data, tvm_out) - tvm.testing.assert_allclose(tvm_out.numpy(), np_indices.astype(data_dtype), rtol=1e0) + tvm_data = tvm.nd.array(np_data, dev) + tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), dev) + f = tvm.build(s, [data, out], target) + f(tvm_data, tvm_out) + tvm.testing.assert_allclose(tvm_out.numpy(), np_indices.astype(data_dtype), rtol=1e0) - for target in ["llvm", "cuda", "opencl", "vulkan", "nvptx"]: - check_target(target) +def test_topk(target, dev, topk, axis, topk_ret_type, is_ascend, dtype): + np.random.seed(0) -def verify_topk(k, axis, ret_type, is_ascend, dtype): shape = (20, 100) data_dtype = "float32" data = te.placeholder(shape, name="data", dtype=data_dtype) @@ -129,7 +125,7 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): np_indices = np.argsort(np_data, axis=axis) else: np_indices = np.argsort(-np_data, axis=axis) - kk = k if k >= 1 else shape[axis] + kk = topk if topk >= 1 else shape[axis] if axis == 0: np_indices = np_indices[:kk, :] np_values = np.zeros(np_indices.shape).astype(data_dtype) @@ -142,61 +138,25 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): np_values[i, :] = np_data[i, np_indices[i, :]] np_indices = np_indices.astype(dtype) - def check_target(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _topk_implement) - outs = fcompute(data, k, axis, ret_type, is_ascend, dtype) - outs = outs if isinstance(outs, list) else [outs] - s = fschedule(outs) - tvm_data = tvm.nd.array(np_data, dev) - tvm_res = [] - for t in outs: - tvm_res.append(tvm.nd.empty(t.shape, dtype=t.dtype, device=dev)) - f = tvm.build(s, [data] + outs, target) - f(tvm_data, *tvm_res) - if ret_type == "both": - tvm.testing.assert_allclose(tvm_res[0].numpy(), np_values) - tvm.testing.assert_allclose(tvm_res[1].numpy(), np_indices) - elif ret_type == "values": - tvm.testing.assert_allclose(tvm_res[0].numpy(), np_values) - else: - tvm.testing.assert_allclose(tvm_res[0].numpy(), np_indices) - - for target in ["llvm", "cuda", "opencl", "vulkan", "nvptx"]: - check_target(target) - - -@tvm.testing.uses_gpu -def test_sort(): - np.random.seed(0) - for axis in [0, -1, 1]: - verify_sort(axis, True) - verify_sort(axis, False) - - -@tvm.testing.uses_gpu -def test_argsort(): - np.random.seed(0) - for axis in [0, -1, 1]: - verify_argsort(axis, True) - verify_argsort(axis, False) - - -@tvm.testing.uses_gpu -def test_topk(): - np.random.seed(0) - for k in [0, 1, 5]: - for axis in [0, -1, 1]: - for ret_type in ["both", "values", "indices"]: - verify_topk(k, axis, ret_type, True, "int64") - verify_topk(k, axis, ret_type, False, "float32") + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _topk_implement) + outs = fcompute(data, topk, axis, topk_ret_type, is_ascend, dtype) + outs = outs if isinstance(outs, list) else [outs] + s = fschedule(outs) + tvm_data = tvm.nd.array(np_data, dev) + tvm_res = [] + for t in outs: + tvm_res.append(tvm.nd.empty(t.shape, dtype=t.dtype, device=dev)) + f = tvm.build(s, [data] + outs, target) + f(tvm_data, *tvm_res) + if topk_ret_type == "both": + tvm.testing.assert_allclose(tvm_res[0].numpy(), np_values) + tvm.testing.assert_allclose(tvm_res[1].numpy(), np_indices) + elif topk_ret_type == "values": + tvm.testing.assert_allclose(tvm_res[0].numpy(), np_values) + else: + tvm.testing.assert_allclose(tvm_res[0].numpy(), np_indices) if __name__ == "__main__": - test_argsort() - test_topk() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_unique.py b/tests/python/topi/python/test_topi_unique.py index 3e26241cea94..4dd9b193ad57 100644 --- a/tests/python/topi/python/test_topi_unique.py +++ b/tests/python/topi/python/test_topi_unique.py @@ -20,9 +20,14 @@ from tvm import topi import tvm.topi.testing +in_dtype = tvm.testing.parameter("int32", "int64") +is_sorted = tvm.testing.parameter(True, False, ids=["sorted", "unsorted"]) +with_counts = tvm.testing.parameter(True, False, ids=["with_counts", "no_counts"]) +arr_size, maxval = tvm.testing.parameters((1, 100), (10, 10), (10000, 100)) + @tvm.testing.parametrize_targets -def test_unique(dev, target): +def test_unique(dev, target, in_dtype, is_sorted, with_counts, arr_size, maxval): def calc_numpy_unique(data, is_sorted=False): uniq, index, inverse, counts = np.unique( data, return_index=True, return_inverse=True, return_counts=True @@ -43,82 +48,67 @@ def calc_numpy_unique(data, is_sorted=False): num_uniq, ] - def check_unique(data, is_sorted=False, with_counts=False): - # numpy reference - np_unique, np_indices, np_inverse_indices, np_counts, np_num_unique = calc_numpy_unique( - data, is_sorted - ) - num_unique = np_num_unique[0] + data = np.random.randint(0, maxval, size=(arr_size)).astype(in_dtype) - implementations = { - "generic": ( - lambda x, return_counts: topi.unique(x, is_sorted, return_counts), - topi.generic.schedule_unique, - ), - "cuda": ( - lambda x, return_counts: topi.cuda.unique(x, is_sorted, return_counts), - topi.cuda.schedule_scan, - ), - "nvptx": ( - lambda x, return_counts: topi.cuda.unique(x, is_sorted, return_counts), - topi.cuda.schedule_scan, - ), - } - fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) - tvm_data = tvm.nd.array(data, device=dev) - tvm_unique = tvm.nd.array(np.zeros(data.shape).astype(data.dtype), device=dev) - tvm_indices = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) - tvm_inverse_indices = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) - tvm_num_unique = tvm.nd.array(np.zeros([1]).astype("int32"), device=dev) + # numpy reference + np_unique, np_indices, np_inverse_indices, np_counts, np_num_unique = calc_numpy_unique( + data, is_sorted + ) + num_unique = np_num_unique[0] - with tvm.target.Target(target): - te_input = tvm.te.placeholder(shape=data.shape, dtype=str(data.dtype)) - outs = fcompute(te_input, with_counts) - s = fschedule(outs) - func = tvm.build(s, [te_input, *outs]) + implementations = { + "generic": ( + lambda x, return_counts: topi.unique(x, is_sorted, return_counts), + topi.generic.schedule_unique, + ), + "gpu": ( + lambda x, return_counts: topi.cuda.unique(x, is_sorted, return_counts), + topi.cuda.schedule_scan, + ), + "nvptx": ( + lambda x, return_counts: topi.cuda.unique(x, is_sorted, return_counts), + topi.cuda.schedule_scan, + ), + } + fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) + tvm_data = tvm.nd.array(data, device=dev) + tvm_unique = tvm.nd.array(np.zeros(data.shape).astype(data.dtype), device=dev) + tvm_indices = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) + tvm_inverse_indices = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) + tvm_num_unique = tvm.nd.array(np.zeros([1]).astype("int32"), device=dev) - if with_counts: - tvm_counts = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) - func( - tvm_data, - tvm_unique, - tvm_indices, - tvm_inverse_indices, - tvm_num_unique, - tvm_counts, - ) - else: - func(tvm_data, tvm_unique, tvm_indices, tvm_inverse_indices, tvm_num_unique) + with tvm.target.Target(target): + te_input = tvm.te.placeholder(shape=data.shape, dtype=str(data.dtype)) + outs = fcompute(te_input, with_counts) + s = fschedule(outs) + func = tvm.build(s, [te_input, *outs]) - num_unique = np_num_unique[0] - assert tvm_num_unique.numpy()[0] == np_num_unique + if with_counts: + tvm_counts = tvm.nd.array(np.zeros(data.shape).astype("int32"), device=dev) + func( + tvm_data, + tvm_unique, + tvm_indices, + tvm_inverse_indices, + tvm_num_unique, + tvm_counts, + ) + else: + func(tvm_data, tvm_unique, tvm_indices, tvm_inverse_indices, tvm_num_unique) - np.testing.assert_allclose(tvm_unique.numpy()[:num_unique], np_unique, atol=1e-5, rtol=1e-5) - np.testing.assert_allclose( - tvm_indices.numpy()[:num_unique], np_indices, atol=1e-5, rtol=1e-5 - ) + num_unique = np_num_unique[0] + assert tvm_num_unique.numpy()[0] == np_num_unique - np.testing.assert_allclose( - tvm_inverse_indices.numpy(), np_inverse_indices, atol=1e-5, rtol=1e-5 - ) + np.testing.assert_allclose(tvm_unique.numpy()[:num_unique], np_unique, atol=1e-5, rtol=1e-5) + np.testing.assert_allclose(tvm_indices.numpy()[:num_unique], np_indices, atol=1e-5, rtol=1e-5) - if with_counts: - np.testing.assert_allclose( - tvm_counts.numpy()[:num_unique], np_counts, atol=1e-5, rtol=1e-5 - ) + np.testing.assert_allclose( + tvm_inverse_indices.numpy(), np_inverse_indices, atol=1e-5, rtol=1e-5 + ) - for in_dtype in ["int32", "int64"]: - for is_sorted in [True, False]: - for with_counts in [True, False]: - data = np.random.randint(0, 100, size=(1)).astype(in_dtype) - check_unique(data, is_sorted, with_counts) - data = np.random.randint(0, 10, size=(10)).astype(in_dtype) - check_unique(data, is_sorted, with_counts) - data = np.random.randint(0, 100, size=(10000)).astype(in_dtype) - check_unique(data, is_sorted, with_counts) + if with_counts: + np.testing.assert_allclose(tvm_counts.numpy()[:num_unique], np_counts, atol=1e-5, rtol=1e-5) if __name__ == "__main__": - test_unique(tvm.device("cpu"), tvm.target.Target("llvm")) - test_unique(tvm.device("cuda"), tvm.target.Target("cuda")) - test_unique(tvm.device("nvptx"), tvm.target.Target("nvptx")) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py index 234107d6686e..6ddb86f4027f 100644 --- a/tests/python/topi/python/test_topi_vision.py +++ b/tests/python/topi/python/test_topi_vision.py @@ -15,19 +15,18 @@ # specific language governing permissions and limitations # under the License. """Test code for vision package""" -from __future__ import print_function import math +import sys + import numpy as np +import pytest + import tvm -from tvm import te -from tvm import topi +import tvm.testing import tvm.topi.testing -from tvm.contrib.pickle_memoize import memoize -from tvm.topi.utils import get_const_tuple +from tvm import te, topi from tvm.topi.vision import ssd, non_max_suppression, get_valid_counts -import pytest -import tvm.testing _get_valid_counts_implement = { "generic": (topi.vision.get_valid_counts, topi.generic.schedule_get_valid_counts), @@ -71,35 +70,46 @@ } -def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): - dtype = "float32" - batch_size, num_anchor, elem_length = dshape - np_data = np.random.uniform(low=-2, high=2, size=dshape).astype(dtype) - np_out1 = np.zeros(shape=(batch_size,)) - np_out2 = np.zeros(shape=dshape).astype(dtype) - np_out3 = np.zeros(shape=(batch_size, num_anchor)) - for i in range(batch_size): - np_out1[i] = 0 - inter_idx = 0 - for j in range(num_anchor): - score = np_data[i, j, score_index] - if score > score_threshold and (id_index < 0 or np_data[i, j, id_index] >= 0): - for k in range(elem_length): - np_out2[i, inter_idx, k] = np_data[i, j, k] - np_out1[i] += 1 - np_out3[i, inter_idx] = j - inter_idx += 1 - if j >= np_out1[i]: - for k in range(elem_length): - np_out2[i, j, k] = -1.0 - np_out3[i, j] = -1 - - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) +class TestValidCounts: + dshape, score_threshold, id_index, score_index = tvm.testing.parameters( + ((1, 1000, 5), 0.5, -1, 0), + ((1, 2500, 6), 0, 0, 1), + ((1, 2500, 5), -1, -1, 0), + ((3, 1000, 6), 0.55, 1, 0), + ((16, 500, 5), 0.95, -1, 1), + ) + dtype = tvm.testing.parameter("float32") + + @tvm.testing.fixture(cache_return_value=True) + def ref_data(self, dtype, dshape, score_threshold, id_index, score_index): + batch_size, num_anchor, elem_length = dshape + np_data = np.random.uniform(low=-2, high=2, size=dshape).astype(dtype) + np_out1 = np.zeros(shape=(batch_size,)) + np_out2 = np.zeros(shape=dshape).astype(dtype) + np_out3 = np.zeros(shape=(batch_size, num_anchor)) + for i in range(batch_size): + np_out1[i] = 0 + inter_idx = 0 + for j in range(num_anchor): + score = np_data[i, j, score_index] + if score > score_threshold and (id_index < 0 or np_data[i, j, id_index] >= 0): + for k in range(elem_length): + np_out2[i, inter_idx, k] = np_data[i, j, k] + np_out1[i] += 1 + np_out3[i, inter_idx] = j + inter_idx += 1 + if j >= np_out1[i]: + for k in range(elem_length): + np_out2[i, j, k] = -1.0 + np_out3[i, j] = -1 + + return np_data, np_out1, np_out2, np_out3 + + def test_get_valid_counts( + self, target, dev, ref_data, dtype, dshape, score_threshold, id_index, score_index + ): + np_data, np_out1, np_out2, np_out3 = ref_data + with tvm.target.Target(target): fcompute, fschedule = tvm.topi.testing.dispatch(target, _get_valid_counts_implement) data = te.placeholder(dshape, name="data", dtype=dtype) @@ -117,20 +127,10 @@ def check_device(target): tvm.testing.assert_allclose(tvm_out2.numpy(), np_out2, rtol=1e-3) tvm.testing.assert_allclose(tvm_out3.numpy(), np_out3, rtol=1e-3) - for target in ["llvm", "cuda", "opencl", "vulkan"]: - check_device(target) - - -@tvm.testing.uses_gpu -def test_get_valid_counts(): - verify_get_valid_counts((1, 1000, 5), 0.5, -1, 0) - verify_get_valid_counts((1, 2500, 6), 0, 0, 1) - verify_get_valid_counts((1, 2500, 5), -1, -1, 0) - verify_get_valid_counts((3, 1000, 6), 0.55, 1, 0) - verify_get_valid_counts((16, 500, 5), 0.95, -1, 1) - def verify_non_max_suppression( + target, + dev, np_data, np_valid_count, np_indices, @@ -151,63 +151,53 @@ def verify_non_max_suppression( valid_count = te.placeholder((batch,), dtype="int32", name="valid_count") indices = te.placeholder((batch, num_anchors), dtype="int32", name="indices") - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _nms_implement) - out = fcompute( - data, - valid_count, - indices, - max_output_size, - iou_threshold, - force_suppress, - top_k, - coord_start=coord_start, - score_index=score_index, - id_index=id_index, - return_indices=False, - ) - indices_out = fcompute( - data, - valid_count, - indices, - max_output_size, - iou_threshold, - force_suppress, - top_k, - coord_start=coord_start, - score_index=score_index, - id_index=id_index, - return_indices=True, - ) - s = fschedule(out) - indices_s = fschedule(indices_out) - - tvm_data = tvm.nd.array(np_data, dev) - tvm_valid_count = tvm.nd.array(np_valid_count, dev) - tvm_indices = tvm.nd.array(np_indices, dev) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _nms_implement) + out = fcompute( + data, + valid_count, + indices, + max_output_size, + iou_threshold, + force_suppress, + top_k, + coord_start=coord_start, + score_index=score_index, + id_index=id_index, + return_indices=False, + ) + indices_out = fcompute( + data, + valid_count, + indices, + max_output_size, + iou_threshold, + force_suppress, + top_k, + coord_start=coord_start, + score_index=score_index, + id_index=id_index, + return_indices=True, + ) + s = fschedule(out) + indices_s = fschedule(indices_out) - tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), dev) - f = tvm.build(s, [data, valid_count, indices, out], target) - f(tvm_data, tvm_valid_count, tvm_indices, tvm_out) - tvm.testing.assert_allclose(tvm_out.numpy(), np_result, rtol=1e-4) + tvm_data = tvm.nd.array(np_data, dev) + tvm_valid_count = tvm.nd.array(np_valid_count, dev) + tvm_indices = tvm.nd.array(np_indices, dev) - tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), dev) - f = tvm.build(indices_s, [data, valid_count, indices, indices_out[0]], target) - f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) - tvm.testing.assert_allclose(tvm_indices_out.numpy(), np_indices_result, rtol=1e-4) + tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), dev) + f = tvm.build(s, [data, valid_count, indices, out], target) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_out) + tvm.testing.assert_allclose(tvm_out.numpy(), np_result, rtol=1e-4) - for target in ["llvm", "cuda", "opencl", "nvptx"]: - check_device(target) + tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), dev) + f = tvm.build(indices_s, [data, valid_count, indices, indices_out[0]], target) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) + tvm.testing.assert_allclose(tvm_indices_out.numpy(), np_indices_result, rtol=1e-4) -@tvm.testing.uses_gpu -def test_non_max_suppression(): +def test_non_max_suppression(target, dev): np_data = np.array( [ [ @@ -236,6 +226,8 @@ def test_non_max_suppression(): np_indices_result = np.array([[3, 0, -1, -1, -1]]) verify_non_max_suppression( + target, + dev, np_data, np_valid_count, np_indices, @@ -277,6 +269,8 @@ def test_non_max_suppression(): ) np_indices_result = np.array([[3, 0, -1, -1, -1]]) verify_non_max_suppression( + target, + dev, np_data, np_valid_count, np_indices, @@ -292,91 +286,85 @@ def test_non_max_suppression(): ) -def verify_multibox_prior( - dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False -): - data = te.placeholder(dshape, name="data") +class TestMultiboxPrior: + dshape, sizes, ratios, steps, offsets, clip = tvm.testing.parameters( + ((1, 3, 50, 50), (1,), (1,), (-1, -1), (0.5, 0.5), False), + ((1, 3, 224, 224), (0.5, 0.25, 0.1), (1, 2, 0.5), (-1, -1), (0.5, 0.5), False), + ((1, 32, 32, 32), (0.5, 0.25), (1, 2), (2, 2), (0.5, 0.5), True), + ) - dtype = data.dtype - input_data = np.random.uniform(size=dshape).astype(dtype) - - in_height = data.shape[2].value - in_width = data.shape[3].value - num_sizes = len(sizes) - num_ratios = len(ratios) - size_ratio_concat = sizes + ratios - steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height - steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width - offset_h = offsets[0] - offset_w = offsets[1] - - oshape = (1, in_height * in_width * (num_sizes + num_ratios - 1), 4) - np_out = np.zeros(oshape).astype(dtype) - - for i in range(in_height): - center_h = (i + offset_h) * steps_h - for j in range(in_width): - center_w = (j + offset_w) * steps_w - for k in range(num_sizes + num_ratios - 1): - w = ( - size_ratio_concat[k] * in_height / in_width / 2.0 - if k < num_sizes - else size_ratio_concat[0] - * in_height - / in_width - * math.sqrt(size_ratio_concat[k + 1]) - / 2.0 - ) - h = ( - size_ratio_concat[k] / 2.0 - if k < num_sizes - else size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0 - ) - count = ( - i * in_width * (num_sizes + num_ratios - 1) - + j * (num_sizes + num_ratios - 1) - + k - ) - np_out[0][count][0] = center_w - w - np_out[0][count][1] = center_h - h - np_out[0][count][2] = center_w + w - np_out[0][count][3] = center_h + h - if clip: - np_out = np.clip(np_out, 0, 1) - - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) + dtype = tvm.testing.parameter("float32") + + @tvm.testing.fixture(cache_return_value=True) + def ref_data(self, dtype, dshape, sizes, ratios, offsets, steps, clip): + in_height = dshape[2] + in_width = dshape[3] + num_sizes = len(sizes) + num_ratios = len(ratios) + size_ratio_concat = sizes + ratios + steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height + steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width + offset_h = offsets[0] + offset_w = offsets[1] + + out_shape = (1, in_height * in_width * (num_sizes + num_ratios - 1), 4) + + np_in = np.random.uniform(size=dshape).astype(dtype) + np_out = np.zeros(out_shape).astype(dtype) + + for i in range(in_height): + center_h = (i + offset_h) * steps_h + for j in range(in_width): + center_w = (j + offset_w) * steps_w + for k in range(num_sizes + num_ratios - 1): + w = ( + size_ratio_concat[k] * in_height / in_width / 2.0 + if k < num_sizes + else size_ratio_concat[0] + * in_height + / in_width + * math.sqrt(size_ratio_concat[k + 1]) + / 2.0 + ) + h = ( + size_ratio_concat[k] / 2.0 + if k < num_sizes + else size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0 + ) + count = ( + i * in_width * (num_sizes + num_ratios - 1) + + j * (num_sizes + num_ratios - 1) + + k + ) + np_out[0][count][0] = center_w - w + np_out[0][count][1] = center_h - h + np_out[0][count][2] = center_w + w + np_out[0][count][3] = center_h + h + if clip: + np_out = np.clip(np_out, 0, 1) + + return np_in, np_out + + def test_multibox_prior( + self, target, dev, dtype, dshape, ref_data, sizes, ratios, steps, offsets, clip + ): + np_in, np_out = ref_data + + data = te.placeholder(dshape, name="data", dtype=dtype) fcompute, fschedule = tvm.topi.testing.dispatch(target, _multibox_prior_implement) with tvm.target.Target(target): out = fcompute(data, sizes, ratios, steps, offsets, clip) s = fschedule(out) - tvm_input_data = tvm.nd.array(input_data, dev) - tvm_out = tvm.nd.array(np.zeros(oshape, dtype=dtype), dev) + tvm_input_data = tvm.nd.array(np_in, dev) + tvm_out = tvm.nd.array(np.zeros(np_out.shape, dtype=dtype), dev) f = tvm.build(s, [data, out], target) f(tvm_input_data, tvm_out) tvm.testing.assert_allclose(tvm_out.numpy(), np_out, rtol=1e-3) - for target in ["llvm", "opencl", "cuda"]: - check_device(target) - - -@tvm.testing.uses_gpu -def test_multibox_prior(): - verify_multibox_prior((1, 3, 50, 50)) - verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5)) - verify_multibox_prior( - (1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True - ) - -@tvm.testing.uses_gpu -def test_multibox_detection(): +def test_multibox_detection(target, dev): batch_size = 1 num_anchors = 3 num_classes = 3 @@ -399,41 +387,56 @@ def test_multibox_detection(): ] ) - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) - - fcompute, fschedule = tvm.topi.testing.dispatch(target, _multibox_detection_implement) - with tvm.target.Target(target): - out = fcompute(cls_prob, loc_preds, anchors) - s = fschedule(out) - - tvm_cls_prob = tvm.nd.array(np_cls_prob.astype(cls_prob.dtype), dev) - tvm_loc_preds = tvm.nd.array(np_loc_preds.astype(loc_preds.dtype), dev) - tvm_anchors = tvm.nd.array(np_anchors.astype(anchors.dtype), dev) - tvm_out = tvm.nd.array(np.zeros((batch_size, num_anchors, 6)).astype(out.dtype), dev) - f = tvm.build(s, [cls_prob, loc_preds, anchors, out], target) - f(tvm_cls_prob, tvm_loc_preds, tvm_anchors, tvm_out) - tvm.testing.assert_allclose(tvm_out.numpy(), expected_np_out, rtol=1e-4) - - for target in ["llvm", "opencl", "cuda"]: - check_device(target) - - -def verify_roi_align( - batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio, mode -): # For mode, 0 = avg, 1 = max - a_shape = (batch, in_channel, in_size, in_size) - rois_shape = (num_roi, 5) + fcompute, fschedule = tvm.topi.testing.dispatch(target, _multibox_detection_implement) + with tvm.target.Target(target): + out = fcompute(cls_prob, loc_preds, anchors) + s = fschedule(out) + + tvm_cls_prob = tvm.nd.array(np_cls_prob.astype(cls_prob.dtype), dev) + tvm_loc_preds = tvm.nd.array(np_loc_preds.astype(loc_preds.dtype), dev) + tvm_anchors = tvm.nd.array(np_anchors.astype(anchors.dtype), dev) + tvm_out = tvm.nd.array(np.zeros((batch_size, num_anchors, 6)).astype(out.dtype), dev) + f = tvm.build(s, [cls_prob, loc_preds, anchors, out], target) + f(tvm_cls_prob, tvm_loc_preds, tvm_anchors, tvm_out) + tvm.testing.assert_allclose(tvm_out.numpy(), expected_np_out, rtol=1e-4) + + +class TestRoiAlign: + ( + batch, + in_channel, + in_size, + num_roi, + pooled_size, + spatial_scale, + sample_ratio, + mode, + ) = tvm.testing.parameters( + (1, 16, 32, 64, 7, 1.0, -1, 0), + (4, 16, 32, 64, 7, 0.5, 2, 0), + (1, 32, 32, 80, 8, 0.0625, 2, 0), + (1, 32, 500, 80, 8, 0.0625, 2, 0), + (1, 16, 32, 64, 7, 1.0, -1, 1), + (4, 16, 32, 64, 7, 0.5, 2, 1), + (1, 32, 32, 80, 8, 0.0625, 2, 1), + (1, 32, 500, 80, 8, 0.0625, 2, 1), + ) - a = te.placeholder(a_shape) - rois = te.placeholder(rois_shape) + @tvm.testing.fixture(cache_return_value=True) + def ref_data( + self, + batch, + in_channel, + in_size, + num_roi, + pooled_size, + spatial_scale, + sample_ratio, + mode, + ): + a_shape = (batch, in_channel, in_size, in_size) + rois_shape = (num_roi, 5) - @memoize("topi.tests.test_topi_vision.verify_roi_align") - def get_ref_data(): a_np = np.random.uniform(-1, 1, size=a_shape).astype("float32") rois_np = np.random.uniform(-1, 1, size=rois_shape).astype("float32") * in_size rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi) @@ -448,13 +451,22 @@ def get_ref_data(): return a_np, rois_np, b_np - a_np, rois_np, b_np = get_ref_data() + def test_roi_align( + self, + target, + dev, + ref_data, + pooled_size, + spatial_scale, + sample_ratio, + mode, + ): + # For mode, 0 = avg, 1 = max + a_np, rois_np, b_np = ref_data + + a = te.placeholder(a_np.shape) + rois = te.placeholder(rois_np.shape) - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return with tvm.target.Target(target): fcompute, fschedule = tvm.topi.testing.dispatch(target, _roi_align_implement) b = fcompute( @@ -469,37 +481,24 @@ def check_device(target): tvm_a = tvm.nd.array(a_np, dev) tvm_rois = tvm.nd.array(rois_np, dev) - tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), device=dev) + tvm_b = tvm.nd.array(np.zeros(b_np.shape, dtype=b.dtype), device=dev) f = tvm.build(s, [a, rois, b], target) f(tvm_a, tvm_rois, tvm_b) tvm_val = tvm_b.numpy() tvm.testing.assert_allclose(tvm_val, b_np, rtol=1e-3, atol=1e-4) - for target in ["llvm", "cuda", "opencl"]: - check_device(target) - -@tvm.testing.uses_gpu -def test_roi_align(): - verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 0) - verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 0) - verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 0) - verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 0) - verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 1) - verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 1) - verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 1) - verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 1) - - -def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_scale): - a_shape = (batch, in_channel, in_size, in_size) - rois_shape = (num_roi, 5) +class TestRoiPool: + batch, in_channel, in_size, num_roi, pooled_size, spatial_scale = tvm.testing.parameters( + (1, 4, 16, 32, 7, 1.0), + (4, 4, 16, 32, 7, 0.5), + ) - a = te.placeholder(a_shape) - rois = te.placeholder(rois_shape) + @tvm.testing.fixture(cache_return_value=True) + def ref_data(self, batch, in_channel, in_size, num_roi, pooled_size, spatial_scale): + a_shape = (batch, in_channel, in_size, in_size) + rois_shape = (num_roi, 5) - @memoize("topi.tests.test_topi_vision.verify_roi_pool") - def get_ref_data(): a_np = np.random.uniform(size=a_shape).astype("float32") rois_np = np.random.uniform(size=rois_shape).astype("float32") * in_size rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi).astype("float32") @@ -509,14 +508,11 @@ def get_ref_data(): ) return a_np, rois_np, b_np - a_np, rois_np, b_np = get_ref_data() + def test_roi_pool(self, target, dev, ref_data, pooled_size, spatial_scale): + a_np, rois_np, b_np = ref_data - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) + a = te.placeholder(a_np.shape) + rois = te.placeholder(rois_np.shape) with tvm.target.Target(target): b = topi.vision.rcnn.roi_pool_nchw( @@ -527,50 +523,32 @@ def check_device(target): tvm_a = tvm.nd.array(a_np, dev) tvm_rois = tvm.nd.array(rois_np, dev) - tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), device=dev) + tvm_b = tvm.nd.array(np.zeros(b_np.shape, dtype=b.dtype), device=dev) f = tvm.build(s, [a, rois, b], target) f(tvm_a, tvm_rois, tvm_b) tvm.testing.assert_allclose(tvm_b.numpy(), b_np, rtol=1e-4) - for target in ["cuda", "llvm"]: - check_device(target) - -@tvm.testing.uses_gpu -def test_roi_pool(): - verify_roi_pool(1, 4, 16, 32, 7, 1.0) - verify_roi_pool(4, 4, 16, 32, 7, 0.5) - - -def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): +def verify_proposal(target, dev, np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs): cls_prob = te.placeholder(np_cls_prob.shape) bbox_pred = te.placeholder(np_bbox_pred.shape) im_info = te.placeholder(np_im_info.shape) - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _proposal_implement) - out = fcompute(cls_prob, bbox_pred, im_info, **attrs) - s = fschedule(out) - f = tvm.build(s, [cls_prob, bbox_pred, im_info, out], target) - tvm_cls_prob = tvm.nd.array(np_cls_prob, device=dev) - tvm_bbox_pred = tvm.nd.array(np_bbox_pred, device=dev) - tvm_im_info = tvm.nd.array(np_im_info, device=dev) - tvm_out = tvm.nd.empty(device=dev, shape=out.shape, dtype=out.dtype) - f(tvm_cls_prob, tvm_bbox_pred, tvm_im_info, tvm_out) - tvm.testing.assert_allclose(tvm_out.numpy(), np_out, rtol=1e-4) - - for target in ["llvm", "cuda"]: - check_device(target) - - -@tvm.testing.uses_gpu -def test_proposal(): + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _proposal_implement) + out = fcompute(cls_prob, bbox_pred, im_info, **attrs) + s = fschedule(out) + f = tvm.build(s, [cls_prob, bbox_pred, im_info, out], target) + tvm_cls_prob = tvm.nd.array(np_cls_prob, device=dev) + tvm_bbox_pred = tvm.nd.array(np_bbox_pred, device=dev) + tvm_im_info = tvm.nd.array(np_im_info, device=dev) + tvm_out = tvm.nd.empty(device=dev, shape=out.shape, dtype=out.dtype) + f(tvm_cls_prob, tvm_bbox_pred, tvm_im_info, tvm_out) + tvm.testing.assert_allclose(tvm_out.numpy(), np_out, rtol=1e-4) + + +@tvm.testing.known_failing_targets("vulkan") +def test_proposal(target, dev): attrs = { "scales": (0.5,), "ratios": (0.5,), @@ -612,7 +590,7 @@ def test_proposal(): dtype="float32", ) - verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs) + verify_proposal(target, dev, np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs) np_out = np.array( [ @@ -625,10 +603,12 @@ def test_proposal(): ) attrs["iou_loss"] = True - verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs) + verify_proposal(target, dev, np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs) def verify_all_class_non_max_suppression( + target, + dev, boxes_np, scores_np, max_output_boxes_per_class, @@ -642,36 +622,24 @@ def verify_all_class_non_max_suppression( boxes = te.placeholder(dshape, name="boxes") scores = te.placeholder(scores_np.shape, dtype="float32", name="scores") - def check_device(target): - dev = tvm.device(target, 0) - if not tvm.testing.device_enabled(target): - print("Skip because %s is not enabled" % target) - return - print("Running on target: %s" % target) - with tvm.target.Target(target): - fcompute, fschedule = tvm.topi.testing.dispatch(target, _all_class_nms_implement) - out = fcompute( - boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold - ) - s = fschedule(out) - - tvm_boxes = tvm.nd.array(boxes_np, dev) - tvm_scores = tvm.nd.array(scores_np, dev) - selected_indices = tvm.nd.array(np.zeros((batch * num_class * num_boxes, 3), "int64"), dev) - num_detections = tvm.nd.array(np.zeros((1,), "int64"), dev) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _all_class_nms_implement) + out = fcompute(boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) + s = fschedule(out) - f = tvm.build(s, [boxes, scores, out[0], out[1]], target) - f(tvm_boxes, tvm_scores, selected_indices, num_detections) + tvm_boxes = tvm.nd.array(boxes_np, dev) + tvm_scores = tvm.nd.array(scores_np, dev) + selected_indices = tvm.nd.array(np.zeros((batch * num_class * num_boxes, 3), "int64"), dev) + num_detections = tvm.nd.array(np.zeros((1,), "int64"), dev) - tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]] - np.testing.assert_equal(tvm_res, expected_indices) + f = tvm.build(s, [boxes, scores, out[0], out[1]], target) + f(tvm_boxes, tvm_scores, selected_indices, num_detections) - for target in ["llvm", "cuda", "opencl", "vulkan"]: - check_device(target) + tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]] + np.testing.assert_equal(tvm_res, expected_indices) -@tvm.testing.uses_gpu -def test_all_class_non_max_suppression(): +def test_all_class_non_max_suppression(target, dev): boxes = np.array( [ [ @@ -707,7 +675,14 @@ def test_all_class_non_max_suppression(): ) verify_all_class_non_max_suppression( - boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, expected + target, + dev, + boxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold, + expected, ) boxes = np.array( @@ -730,16 +705,16 @@ def test_all_class_non_max_suppression(): expected = np.array([[0, 0, 3], [0, 0, 0]]) verify_all_class_non_max_suppression( - boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, expected + target, + dev, + boxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold, + expected, ) if __name__ == "__main__": - test_get_valid_counts() - test_multibox_prior() - test_multibox_detection() - test_roi_align() - test_roi_pool() - test_proposal() - test_non_max_suppression() - test_all_class_non_max_suppression() + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/unittest/test_tir_intrin.py b/tests/python/unittest/test_tir_intrin.py index a8d57b3da780..ecc30199c1a7 100644 --- a/tests/python/unittest/test_tir_intrin.py +++ b/tests/python/unittest/test_tir_intrin.py @@ -139,7 +139,19 @@ def test_ldexp(): tvm.testing.assert_allclose(c.numpy(), np.ldexp(a.numpy(), b.numpy()), atol=1e-5, rtol=1e-5) -def test_clz(): +dtype = tvm.testing.parameter("int32", "int64") + + +@tvm.testing.parametrize_targets("llvm", "vulkan -from_device=0") +def test_clz(target, dev, dtype): + target = tvm.target.Target(target) + if ( + target.kind.name == "vulkan" + and dtype == "int64" + and not target.attrs.get("supports_int64", False) + ): + pytest.xfail("Vulkan target does not support Int64 types") + def clz_np(x, dtype): ceil_log2 = np.ceil(np.log2(x)).astype(dtype) bits = int(dtype[-2:]) @@ -147,38 +159,32 @@ def clz_np(x, dtype): clz[np.bitwise_and(x, x - 1) == 0] -= 1 return clz - for target in ["llvm", "vulkan"]: - if not tvm.testing.device_enabled("vulkan"): - continue + m = te.var("m") + A = te.placeholder((m,), name="A", dtype=dtype) + B = te.compute((m,), lambda *i: tvm.tir.clz(A(*i)), name="B") + s = te.create_schedule(B.op) - for dtype in ["int32", "int64"]: - m = te.var("m") - A = te.placeholder((m,), name="A", dtype=dtype) - B = te.compute((m,), lambda *i: tvm.tir.clz(A(*i)), name="B") - s = te.create_schedule(B.op) + if target.kind.name == "vulkan": + bx, tx = s[B].split(B.op.axis[0], factor=64) - if target == "vulkan": - bx, tx = s[B].split(B.op.axis[0], factor=64) + s[B].bind(bx, te.thread_axis("blockIdx.x")) + s[B].bind(tx, te.thread_axis("threadIdx.x")) - s[B].bind(bx, te.thread_axis("blockIdx.x")) - s[B].bind(tx, te.thread_axis("threadIdx.x")) - - f = tvm.build(s, [A, B], target) - dev = tvm.device(target, 0) - n = 10 + f = tvm.build(s, [A, B], target) + n = 10 - highs = [10, 100, 1000, 10000, 100000, 1000000] + highs = [10, 100, 1000, 10000, 100000, 1000000] - if dtype == "int64": - highs.append((1 << 63) - 1) + if dtype == "int64": + highs.append((1 << 63) - 1) - for high in highs: - a_np = np.random.randint(1, high=high, size=(n,)).astype(dtype) - a = tvm.nd.array(a_np, dev) - b = tvm.nd.array(np.zeros((n,)).astype("int32"), dev) - f(a, b) - ref = clz_np(a_np, dtype) - np.testing.assert_equal(b.numpy(), ref) + for high in highs: + a_np = np.random.randint(1, high=high, size=(n,), dtype=dtype) + a = tvm.nd.array(a_np, dev) + b = tvm.nd.array(np.zeros((n,)).astype("int32"), dev) + f(a, b) + ref = clz_np(a_np, dtype) + np.testing.assert_equal(b.numpy(), ref) @tvm.script.tir diff --git a/tests/python/unittest/test_tvm_testing_features.py b/tests/python/unittest/test_tvm_testing_features.py index 4c9c5d91901a..f396eeeee5fb 100644 --- a/tests/python/unittest/test_tvm_testing_features.py +++ b/tests/python/unittest/test_tvm_testing_features.py @@ -57,7 +57,10 @@ def test_explicit_list(self, target): self.targets_with_explicit_list.append(target) def test_no_repeats_in_explicit_list(self): - assert self.targets_with_explicit_list == ["llvm"] + if tvm.testing.device_enabled("llvm"): + assert self.targets_with_explicit_list == ["llvm"] + else: + assert self.targets_with_explicit_list == [] targets_with_exclusion = []