From e74a0eb6111c147673e3cd73246ffd20dfc92615 Mon Sep 17 00:00:00 2001 From: Zhi <5145158+zhiics@users.noreply.github.com> Date: Thu, 24 Oct 2019 09:56:58 -0700 Subject: [PATCH] [relay] use time_evaluator for measurement (#4191) --- .../python/relay/benchmarking/benchmark_vm.py | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/python/relay/benchmarking/benchmark_vm.py b/tests/python/relay/benchmarking/benchmark_vm.py index b1d8b9cb9130..cfb3fd42f834 100644 --- a/tests/python/relay/benchmarking/benchmark_vm.py +++ b/tests/python/relay/benchmarking/benchmark_vm.py @@ -21,16 +21,20 @@ from tvm.contrib import graph_runtime from tvm import relay from tvm.relay import testing +from tvm.relay import vm +from tvm.relay import vmobj as _obj def benchmark_execution(mod, params, - measure=False, + measure=True, data_shape=(1, 3, 224, 224), out_shape=(1, 1000), - dtype='float32'): - def get_tvm_output(mod, data, params, target, ctx, dtype='float32'): - with relay.build_config(opt_level=1): + dtype='float32', + model="unknown"): + def get_graph_runtime_output(mod, data, params, target, ctx, + dtype='float32', number=2, repeat=20): + with relay.build_config(opt_level=3): graph, lib, params = relay.build(mod, target, params=params) m = graph_runtime.create(graph, lib, ctx) @@ -41,18 +45,34 @@ def get_tvm_output(mod, data, params, target, ctx, dtype='float32'): out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) if measure: - print("Evaluate graph runtime inference time cost...") + print("Evaluate graph runtime inference cost of {} on " + "{}".format(model, repr(ctx))) ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20) # Measure in millisecond. prof_res = np.array(ftimer().results) * 1000 - print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + print("Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) return out.asnumpy() - def get_tvm_vm_output(mod, data, params, target, ctx, dtype='float32'): - ex = relay.create_executor('vm', mod=mod, ctx=ctx) - result = ex.evaluate()(data, **params) + def get_vm_output(mod, data, params, target, ctx, dtype='float32', + number=2, repeat=20): + with relay.build_config(opt_level=3): + exe = vm.compile(mod, target, params=params) + rly_vm = vm.VirtualMachine(exe) + rly_vm.init(ctx) + result = rly_vm.run(data) + + if measure: + print("Evaluate vm inference cost of {} on {}".format(model, + repr(ctx))) + ftimer = rly_vm.mod.time_evaluator("invoke", ctx, number=number, + repeat=repeat) + # Measure in millisecond. + prof_res = np.array(ftimer("main", _obj.Tensor(data)).results) * 1000 + print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + return result.asnumpy().astype(dtype) # random input @@ -60,41 +80,46 @@ def get_tvm_vm_output(mod, data, params, target, ctx, dtype='float32'): target = "llvm" ctx = tvm.cpu(0) - tvm_out = get_tvm_output(mod, tvm.nd.array(data.astype(dtype)), params, - target, ctx, dtype) - vm_out = get_tvm_vm_output(mod, tvm.nd.array(data.astype(dtype)), params, - target, ctx, dtype) + tvm_out = get_graph_runtime_output(mod, tvm.nd.array(data.astype(dtype)), + params, target, ctx, dtype) + vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params, + target, ctx, dtype) tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5) def test_mlp(): image_shape = (1, 1, 28, 28) mod, params = testing.mlp.get_workload(1) - benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10)) + benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10), + model="mlp") def test_vgg(): for n in [11, 16]: mod, params = testing.vgg.get_workload(1, num_layers=n) - benchmark_execution(mod, params) + model = "vgg" + str(n) + benchmark_execution(mod, params, model=model) def test_resnet(): for n in [18, 50]: mod, params = testing.resnet.get_workload(batch_size=1, num_layers=n) - benchmark_execution(mod, params, True) + model = "resnet" + str(n) + benchmark_execution(mod, params, model=model) def test_squeezenet(): for version in ['1.0', '1.1']: mod, params = testing.squeezenet.get_workload(version=version) - benchmark_execution(mod, params) + model = "squeezenet" + version + benchmark_execution(mod, params, model=model) def test_inception_v3(): image_shape = (3, 299, 299) mod, params = testing.inception_v3.get_workload(image_shape=image_shape) - benchmark_execution(mod, params, data_shape=(1, 3, 299, 299)) + benchmark_execution(mod, params, data_shape=(1, 3, 299, 299), + model="inception_v3") def test_dqn(): @@ -112,7 +137,7 @@ def test_dcgan(): def test_mobilenet(): mod, params = testing.mobilenet.get_workload(batch_size=1) - benchmark_execution(mod, params) + benchmark_execution(mod, params, model="mobilenet") # TODO: enable when the low building performance (several minutes) fixed. def test_mobilenet_nhwc(): @@ -124,7 +149,7 @@ def test_mobilenet_nhwc(): def test_densenet(): mod, params = testing.densenet.get_workload(batch_size=1) - benchmark_execution(mod, params) + benchmark_execution(mod, params, model="densenet") if __name__ == '__main__':