From e6b68b45c90ee176a6480dcb1ecbe289bbcb1518 Mon Sep 17 00:00:00 2001 From: hlu1 <14827759+hlu1@users.noreply.github.com> Date: Thu, 23 May 2019 10:13:11 -0700 Subject: [PATCH] [GraphRuntime] Debug graph runtime (#3232) --- python/tvm/contrib/debugger/debug_result.py | 13 ++-- python/tvm/contrib/debugger/debug_runtime.py | 42 ++++++++----- .../graph/debug/graph_runtime_debug.cc | 63 +++++++------------ .../unittest/test_runtime_graph_debug.py | 4 -- 4 files changed, 55 insertions(+), 67 deletions(-) diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py index c53a2c287339..882364dd3971 100644 --- a/python/tvm/contrib/debugger/debug_result.py +++ b/python/tvm/contrib/debugger/debug_result.py @@ -207,10 +207,8 @@ def dump_graph_json(self, graph): def display_debug_result(self): """Displays the debugger result" """ - header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \ - "End Time", "Shape", "Inputs", "Outputs"] - lines = ["---------", "---", "--------", "-------", "----------", \ - "--------", "-----", "------", "-------"] + header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", "Inputs", "Outputs"] + lines = ["---------", "---", "--------", "-------", "-----", "------", "-------"] eid = 0 data = [] total_time = sum(time[0] for time in self._time_list) @@ -223,12 +221,11 @@ def display_debug_result(self): continue name = node['name'] shape = str(self._output_tensor_list[eid].shape) - time_us = round(time[0] * 1000000, 2) - time_percent = round(((time[0] / total_time) * 100), 2) + time_us = round(time[0] * 1000000, 3) + time_percent = round(((time[0] / total_time) * 100), 3) inputs = str(node['attrs']['num_inputs']) outputs = str(node['attrs']['num_outputs']) - node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \ - shape, inputs, outputs] + node_data = [name, op, time_us, time_percent, shape, inputs, outputs] data.append(node_data) eid += 1 fmt = "" diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py index 01cda35769a5..f77a927eeabf 100644 --- a/python/tvm/contrib/debugger/debug_runtime.py +++ b/python/tvm/contrib/debugger/debug_runtime.py @@ -19,7 +19,6 @@ import os import tempfile import shutil -from datetime import datetime from tvm._ffi.base import string_types from tvm._ffi.function import get_global_func from tvm.contrib import graph_runtime @@ -30,6 +29,7 @@ _DUMP_ROOT_PREFIX = "tvmdbg_" _DUMP_PATH_PREFIX = "_tvmdbg_" + def create(graph_json_str, libmod, ctx, dump_root=None): """Create a runtime executor module given a graph and module. @@ -62,17 +62,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None): try: fcreate = get_global_func("tvm.graph_runtime_debug.create") except ValueError: - raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \ - "config.cmake and rebuild TVM to enable debug mode") + raise ValueError( + "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " + "config.cmake and rebuild TVM to enable debug mode" + ) ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx) if num_rpc_ctx == len(ctx): libmod = rpc_base._ModuleHandle(libmod) try: - fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime_debug.remote_create") + fcreate = ctx[0]._rpc_sess.get_function( + "tvm.graph_runtime_debug.remote_create" + ) except ValueError: - raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \ - "config.cmake and rebuild TVM to enable debug mode") + raise ValueError( + "Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " + "config.cmake and rebuild TVM to enable debug mode" + ) func_obj = fcreate(graph_json_str, libmod, *device_type_id) return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root) @@ -100,10 +106,10 @@ class GraphModuleDebug(graph_runtime.GraphModule): To select which folder the outputs should be kept. None will make a temp folder in /tmp/tvmdbg and does the dumping """ + def __init__(self, module, ctx, graph_json_str, dump_root): self._dump_root = dump_root self._dump_path = None - self._debug_run = module["debug_run"] self._get_output_by_layer = module["get_output_by_layer"] self._run_individual = module["run_individual"] graph_runtime.GraphModule.__init__(self, module) @@ -181,13 +187,10 @@ def _run_debug(self): Time consumed for each execution will be set as debug output. """ - self.debug_datum._time_list = [] - + self.debug_datum._time_list = [ + [float(t) * 1e-6] for t in self.run_individual(10, 1, 1) + ] for i, node in enumerate(self.debug_datum.get_graph_nodes()): - start_time = datetime.now().time() - time_stamp = self._debug_run(i) - end_time = datetime.now().time() - self.debug_datum._time_list.append([time_stamp, start_time, end_time]) num_outputs = self.debug_datum.get_graph_node_output_num(node) for j in range(num_outputs): out_tensor = self._get_output_by_layer(i, j) @@ -212,8 +215,13 @@ def debug_get_output(self, node, out): ret = output_tensors[node] except: node_list = output_tensors.keys() - raise RuntimeError("Node " + node + " not found, available nodes are: " - + str(node_list) + ".") + raise RuntimeError( + "Node " + + node + + " not found, available nodes are: " + + str(node_list) + + "." + ) elif isinstance(node, int): output_tensors = self.debug_datum._output_tensor_list ret = output_tensors[node] @@ -242,7 +250,9 @@ def run(self, **input_dict): self.debug_datum.display_debug_result() def run_individual(self, number, repeat=1, min_repeat_ms=0): - self._run_individual(number, repeat, min_repeat_ms) + ret = self._run_individual(number, repeat, min_repeat_ms) + return ret.strip(",").split(",") if ret else [] + def exit(self): """Exits the dump folder and all its contents""" diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc index 560bf3da238e..2b26ae541b5f 100644 --- a/src/runtime/graph/debug/graph_runtime_debug.cc +++ b/src/runtime/graph/debug/graph_runtime_debug.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -24,7 +24,9 @@ #include #include #include + #include +#include #include "../graph_runtime.h" namespace tvm { @@ -39,40 +41,23 @@ namespace runtime { class GraphRuntimeDebug : public GraphRuntime { public: /*! - * \brief Run each operation and get the output. - * \param index The index of op which needs to be run. - * \return the elapsed time. - */ - double DebugRun(size_t index) { - CHECK(index < op_execs_.size()); - TVMContext ctx = data_entry_[entry_id(index, 0)]->ctx; - auto tbegin = std::chrono::high_resolution_clock::now(); - if (op_execs_[index]) { - op_execs_[index](); - } - TVMSynchronize(ctx.device_type, ctx.device_id, nullptr); - auto tend = std::chrono::high_resolution_clock::now(); - double time = std::chrono::duration_cast >( - tend - tbegin).count(); - return time; - } - - /*! - * \brief Run each operation in the graph and print out the runtime per op. + * \brief Run each operation in the graph and get the time per op for all ops. * \param number The number of times to run this function for taking average. * \param repeat The number of times to repeat the measurement. - In total, the function will be invoked (1 + number x repeat) times, - where the first one is warmed up and will be discarded in case - there is lazy initialization. + * In total, the function will be invoked (1 + number x repeat) times, + * where the first one is warmed up and will be discarded in case + * there is lazy initialization. * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds. - By default, one `repeat` contains `number` runs. If this parameter is set, - the parameters `number` will be dynamically adjusted to meet the - minimum duration requirement of one `repeat`. + * By default, one `repeat` contains `number` runs. If this parameter is set, + * the parameters `number` will be dynamically adjusted to meet the + * minimum duration requirement of one `repeat`. + * \return Comma seperated string containing the elapsed time per op for the last + * iteration only, because returning a long string over rpc can be expensive. */ - void RunIndividual(int number, int repeat, int min_repeat_ms) { + std::string RunIndividual(int number, int repeat, int min_repeat_ms) { // warmup run GraphRuntime::Run(); - + std::ostringstream os; std::vector time_per_op(op_execs_.size(), 0); for (int i = 0; i < repeat; ++i) { std::chrono::time_point< @@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime { auto op_tend = std::chrono::high_resolution_clock::now(); double op_duration = std::chrono::duration_cast< std::chrono::duration >(op_tend - op_tbegin).count(); - time_per_op[index] += op_duration * 1000; // ms + time_per_op[index] += op_duration * 1e6; // us } } } @@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime { (tend - tbegin).count() * 1000; } while (duration_ms < min_repeat_ms); - LOG(INFO) << "Repeat: " << i; + LOG(INFO) << "Iteration: " << i; int op = 0; for (size_t index = 0; index < time_per_op.size(); index++) { if (op_execs_[index]) { time_per_op[index] /= number; LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " - << time_per_op[index] << " ms/iter"; + << time_per_op[index] << " us/iter"; } } } + for (size_t index = 0; index < time_per_op.size(); index++) { + os << time_per_op[index] << ","; + } + return os.str(); } /*! @@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction( const std::string& name, const std::shared_ptr& sptr_to_self) { // return member functions during query. - if (name == "debug_run") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - *rv = this->DebugRun(static_cast(args[0].operator int64_t())); - }); - } else if (name == "get_output_by_layer") { + if (name == "get_output_by_layer") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->GetOutputByLayer(args[0], args[1]); }); @@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction( CHECK_GT(number, 0); CHECK_GT(repeat, 0); CHECK_GE(min_repeat_ms, 0); - this->RunIndividual(number, repeat, min_repeat_ms); + *rv = this->RunIndividual(number, repeat, min_repeat_ms); }); } else { return GraphRuntime::GetFunction(name, sptr_to_self); diff --git a/tests/python/unittest/test_runtime_graph_debug.py b/tests/python/unittest/test_runtime_graph_debug.py index 3de270732403..717b23c22689 100644 --- a/tests/python/unittest/test_runtime_graph_debug.py +++ b/tests/python/unittest/test_runtime_graph_debug.py @@ -100,9 +100,6 @@ def check_verify(): out = mod.get_output(0, tvm.nd.empty((n,))) np.testing.assert_equal(out.asnumpy(), a + 1) - #test individual run - mod.run_individual(20, 2, 1) - mod.exit() #verify dump root delete after cleanup assert(not os.path.exists(directory)) @@ -129,7 +126,6 @@ def check_remote(): mod.run(x=tvm.nd.array(a, ctx)) out = tvm.nd.empty((n,), ctx=ctx) out = mod.get_output(0, out) - mod.run_individual(20, 2, 1) np.testing.assert_equal(out.asnumpy(), a + 1) check_verify()