From 56b036c4a606e75b7fa11fdb5dfeacf0688f773f Mon Sep 17 00:00:00 2001 From: Zhi Chen Date: Wed, 6 Mar 2019 22:52:09 +0000 Subject: [PATCH] make resnet and vgg work --- python/tvm/relay/vm.py | 35 ++++++++++++++++++--- src/relay/vm/compiler.cc | 23 +++++++++++--- src/relay/vm/vm.cc | 29 +++++++++-------- tests/python/frontend/mxnet/benchmark_vm.py | 28 +++++++++++------ 4 files changed, 82 insertions(+), 33 deletions(-) diff --git a/python/tvm/relay/vm.py b/python/tvm/relay/vm.py index 92f489438ec0..497fc5a4ff1f 100644 --- a/python/tvm/relay/vm.py +++ b/python/tvm/relay/vm.py @@ -23,7 +23,9 @@ def tag(self): def optimize(expr, mod=None): # TODO: We need to move this optimization code into the optimizer/pass manager ck_expr = ir_pass.infer_type(expr, mod=mod) - fused_expr = ir_pass.fuse_ops(ck_expr, mod=mod) + simplified_expr = ir_pass.simplify_inference(ck_expr) + simplified_expr = ir_pass.infer_type(simplified_expr, mod=mod) + fused_expr = ir_pass.fuse_ops(simplified_expr, mod=mod) ck_fused = ir_pass.infer_type(fused_expr, mod=mod) return ck_fused @@ -64,7 +66,7 @@ def convert(args): return cargs -def eval_vm(mod, ctx, *args): +def eval_vm(mod, ctx, *args, **kwargs): """ Evaluate a module on a given context with the provided arguments. @@ -72,14 +74,19 @@ def eval_vm(mod, ctx, *args): ---------- mod: relay.Module The module to optimize, will execute its entry_func. + ctx: tvm.Context The TVM context to execute on. - args: ... + + args: List[tvm.NDArray, np.ndarray] The arguments to evaluate. + + kwargs: Dict[str, Union[tvm.NDArrray, np.ndarray]] + The keyword arguments to evaluate. """ main_func = mod[mod.entry_func] - if len(main_func.params) == 0 and isinstance(main_func.body, GlobalVar): + if not main_func.params and isinstance(main_func.body, GlobalVar): main_func = eta_expand(main_func.body, mod) assert isinstance(main_func, Function) @@ -88,6 +95,26 @@ def eval_vm(mod, ctx, *args): args = list(args) assert isinstance(args, list) + + params = main_func.params + if kwargs: + param_names = [parm.name_hint for param in params] + arg_count = len(args) + + for i, name in enumerate(param_names): + if i < arg_count: + if kwargs.get(name): + raise Exception("Duplicate argument found in both inputs \ + (at position: {0}) and keyword argument \ + (with name: {1})".format(i, name)) + else: + args.append(kwargs[name]) + + if len(args) != len(params): + raise Exception("Mismatch found between the expected and provided \ + arguments, expected: {0], provided: \ + {1}".format(len(args), len(params))) + cargs = convert(args) result = _vm._evaluate_vm(mod, ctx.device_type, ctx.device_id, *cargs) diff --git a/src/relay/vm/compiler.cc b/src/relay/vm/compiler.cc index 2fea1d3b921b..6a4c4db2cb34 100644 --- a/src/relay/vm/compiler.cc +++ b/src/relay/vm/compiler.cc @@ -10,11 +10,14 @@ #include #include #include +#include #include "../backend/compile_engine.h" #include "../../runtime/naive_allocator.h" #include #include +#include +#include using namespace tvm::runtime; @@ -82,6 +85,11 @@ struct VMCompiler : ExprFunctor { std::unordered_map var_map; size_t stack_index; CompileEngine engine; + + /*! \brief The functions that have been lowered. */ + std::unordered_map seen_funcs; + + /*! \brief Global shared meta data */ VMCompilerContext* context; VMCompiler(VMCompilerContext* context) : @@ -293,8 +301,15 @@ struct VMCompiler : ExprFunctor { auto cfunc = engine->Lower(key); // TODO: support lowered funcs for multiple targets CHECK(cfunc->funcs.size() == 1); - auto op_index = this->context->lowered_funcs.size(); - this->context->lowered_funcs.push_back(cfunc->funcs[0]); + auto op_index = -1; + if (seen_funcs.find(cfunc->funcs[0]) == seen_funcs.end()) { + op_index = this->context->lowered_funcs.size(); + this->context->lowered_funcs.push_back(cfunc->funcs[0]); + seen_funcs[cfunc->funcs[0]] = op_index; + LOG(INFO) << "lowered_funcs: " << cfunc->funcs[0].operator->()->name; + } else { + op_index = seen_funcs[cfunc->funcs[0]]; + } // If Tensor, 1 // If Tuple, size of tuple @@ -486,13 +501,11 @@ void PopulateGlobalMap(GlobalMap* global_map, const Module& mod) { } } -// Verify - VirtualMachine CompileModule(const Module& mod_ref) { Module mod = mod_ref; + // Run some optimizations first, this code should // be moved to pass manager. - mod = OptimizeModule(mod); VirtualMachine vm; diff --git a/src/relay/vm/vm.cc b/src/relay/vm/vm.cc index e4d72abf50f8..f2fad4856b47 100644 --- a/src/relay/vm/vm.cc +++ b/src/relay/vm/vm.cc @@ -14,6 +14,7 @@ #include #include +#include using namespace tvm::runtime; @@ -389,16 +390,18 @@ VMObject VirtualMachine::Invoke(const VMFunction& func, const std::vector& args) { +VMObject VirtualMachine::Invoke(const GlobalVar& global, + const std::vector& args) { auto func_index = this->global_map[global]; RELAY_LOG(INFO) << "Invoke Global " << global << " at index " << func_index << std::endl; return Invoke(this->functions[func_index], args); } -void InvokePacked(const PackedFunc& func, size_t arg_count, size_t output_size, std::vector& stack) { +void InvokePacked(const PackedFunc& func, size_t arg_count, size_t output_size, + std::vector& stack) { auto stack_end = stack.size() - 1; - RELAY_LOG(INFO) << "arg_count: " << arg_count; + RELAY_LOG(INFO) << "arg_count: " << arg_count << " output_size: " << output_size; CHECK(arg_count <= stack.size()); std::vector values(arg_count); @@ -406,7 +409,7 @@ void InvokePacked(const PackedFunc& func, size_t arg_count, size_t output_size, runtime::TVMArgsSetter setter(values.data(), codes.data()); auto argument_start = stack.size() - arg_count; - RELAY_LOG(INFO) << "ArgumentStart=" << argument_start << std::endl; + RELAY_LOG(INFO) << "argument_start = " << argument_start << std::endl; for (size_t i = 0; i < arg_count; i++) { NDArray data = ToNDArray(stack[argument_start + i]); setter(i, data); @@ -415,20 +418,10 @@ void InvokePacked(const PackedFunc& func, size_t arg_count, size_t output_size, TVMRetValue rv; func.CallPacked(TVMArgs(values.data(), codes.data(), arg_count), &rv); - // // Fix the object at return value position - // if (output_size == 1) { - // stack[stack.size() - 1] = stack[stack.size() - 2]; - // } else { - // auto adt = std::dynamic_pointer_cast(stack.back().ptr); - // for (size_t i = 0; i < output_size; ++i) { - // adt->fields[i] = stack[stack.size() - output_size - 1 + i]; - // } - // } - // We can do this more efficiently by reverse laying out the arguments // and just shrinking the stack. stack[stack.size() - arg_count] = stack[stack_end]; - RELAY_LOG(INFO) << "ShrinkBy=" << arg_count - output_size << std::endl; + RELAY_LOG(INFO) << "ShrinkBy = " << arg_count - output_size << std::endl; stack.resize(stack.size() - (arg_count - output_size)); } @@ -770,7 +763,13 @@ TVM_REGISTER_API("relay._vm._evaluate_vm") vm_args.push_back(obj); } + auto start = std::chrono::high_resolution_clock::now(); auto result = EvaluateModule(module, {ctx}, vm_args); + auto end = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(end - start) + .count(); + LOG(INFO) << "Inference time: " << duration; RELAY_LOG(INFO) << "Returning results\n"; *ret = VMToValue(std::get<1>(result), std::get<0>(result)); }); diff --git a/tests/python/frontend/mxnet/benchmark_vm.py b/tests/python/frontend/mxnet/benchmark_vm.py index 8c385e0dba8a..a16f1eda5236 100644 --- a/tests/python/frontend/mxnet/benchmark_vm.py +++ b/tests/python/frontend/mxnet/benchmark_vm.py @@ -10,6 +10,7 @@ def benchmark_execution(mx_symbol, + measure=False, data_shape=(1, 3, 224, 224), out_shape=(1, 1000), dtype='float32'): @@ -37,19 +38,26 @@ def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'): m = graph_runtime.create(graph, lib, ctx) # set inputs - m.set_input("data", tvm.nd.array(x.astype(dtype))) + m.set_input("data", x) m.set_input(**params) m.run() out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) + + if measure: + print("Evaluate graph runtime inference time cost...") + ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20) + # Measure in millisecond. + prof_res = np.array(ftimer().results) * 1000 + print("Mean inference time (std dev): %.2f ms (%.2f ms)" % + (np.mean(prof_res), np.std(prof_res))) + return out.asnumpy() def get_tvm_vm_output(symbol, x, args, auxs, target, ctx, dtype='float32'): func, params = get_func_param(symbol, x, args, auxs) - params = [params[k] for k in params] - params = [x] + params ex = relay.create_executor('vm', mod=relay.Module(), ctx=ctx) - result = ex.evaluate(func)(*params) - return result.asnumpy() + result = ex.evaluate(func)(x, **params) + return result.asnumpy().astype(dtype) # random input x = np.random.uniform(size=data_shape).astype(dtype) @@ -58,8 +66,10 @@ def get_tvm_vm_output(symbol, x, args, auxs, target, ctx, dtype='float32'): _, args, auxs = get_mxnet_output(mx_symbol, x, dtype) assert "data" not in args - tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype) - vm_out = get_tvm_vm_output(mx_symbol, x, args, auxs, target, ctx, dtype) + tvm_out = get_tvm_output(mx_symbol, tvm.nd.array(x.astype(dtype)), args, + auxs, target, ctx, dtype) + vm_out = get_tvm_vm_output(mx_symbol, tvm.nd.array(x.astype(dtype)), args, + auxs, target, ctx, dtype) tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5) @@ -126,8 +136,8 @@ def relay_compose(F, **kwargs): if __name__ == '__main__': - test_mlp() - # test_resnet() + # test_mlp() + test_resnet() # test_vgg() # test_multi_outputs() # test_dqn()