Apply comments

apache · Aug 12, 2022 · ef7558c · ef7558c
1 parent 8067b1c
commit ef7558c
Show file tree

Hide file tree

Showing 11 changed files with 95 additions and 81 deletions.
diff --git a/include/tvm/runtime/profiling.h b/include/tvm/runtime/profiling.h
@@ -573,8 +573,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
  *        minimum duration requirement of one `repeat`.
  *        i.e., When the run time of one `repeat` falls below this time,
  *        the `number` parameter will be automatically increased.
- * \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
- *        It helps to avoid hanging during measurements.
+ * \param limit_zero_time_iterations The maximum number of repeats when
+ *        measured time is equal to 0.  It helps to avoid hanging during measurements.
  * \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
  *        defined by `repeats_to_cooldown`.
  * \param repeats_to_cooldown The number of repeats before the
@@ -584,8 +584,8 @@ PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type, i
  * \return f_timer A timer function.
  */
 PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat, int min_repeat_ms,
-                             int max_repeat_num, int cooldown_interval_ms, int repeats_to_cooldown,
-                             PackedFunc f_preproc = nullptr);
+                             int limit_zero_time_iterations, int cooldown_interval_ms,
+                             int repeats_to_cooldown, PackedFunc f_preproc = nullptr);
 
 }  // namespace profiling
 }  // namespace runtime

diff --git a/python/tvm/contrib/debugger/debug_executor.py b/python/tvm/contrib/debugger/debug_executor.py
@@ -228,7 +228,7 @@ def _run_debug(
         number,
         repeat,
         min_repeat_ms,
-        max_repeat_num,
+        limit_zero_time_iterations,
         cooldown_interval_ms,
         repeats_to_cooldown,
     ):
@@ -241,7 +241,7 @@ def _run_debug(
             number=number,
             repeat=repeat,
             min_repeat_ms=min_repeat_ms,
-            max_repeat_num=max_repeat_num,
+            limit_zero_time_iterations=limit_zero_time_iterations,
             cooldown_interval_ms=cooldown_interval_ms,
             repeats_to_cooldown=repeats_to_cooldown,
         )
@@ -281,7 +281,7 @@ def run(
         number=10,
         repeat=1,
         min_repeat_ms=1,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
         **input_dict,
@@ -309,7 +309,7 @@ def run(
             i.e., When the run time of one `repeat` falls below this time, the `number` parameter
             will be automatically increased.
 
-        max_repeat_num: int, optional
+        limit_zero_time_iterations: int, optional
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -331,7 +331,7 @@ def run(
             number=number,
             repeat=repeat,
             min_repeat_ms=min_repeat_ms,
-            max_repeat_num=max_repeat_num,
+            limit_zero_time_iterations=limit_zero_time_iterations,
             cooldown_interval_ms=cooldown_interval_ms,
             repeats_to_cooldown=repeats_to_cooldown,
         )
@@ -347,7 +347,7 @@ def run_individual(
         number,
         repeat=1,
         min_repeat_ms=0,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
     ):
@@ -372,7 +372,7 @@ def run_individual(
             i.e., When the run time of one `repeat` falls below this time, the `number` parameter
             will be automatically increased.
 
-        max_repeat_num: int, optional
+        limit_zero_time_iterations: int, optional
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -389,7 +389,12 @@ def run_individual(
         the repeat of the measurement.
         """
         res = self._run_individual(
-            number, repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown
+            number,
+            repeat,
+            min_repeat_ms,
+            limit_zero_time_iterations,
+            cooldown_interval_ms,
+            repeats_to_cooldown,
         )
         results = []
         offset = 0
@@ -409,7 +414,7 @@ def run_individual_node(
         number=10,
         repeat=1,
         min_repeat_ms=0,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
     ):
@@ -441,7 +446,7 @@ def run_individual_node(
             i.e., When the run time of one `repeat` falls below this time, the `number` parameter
             will be automatically increased.
 
-        max_repeat_num: int, optional
+        limit_zero_time_iterations: int, optional
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -462,7 +467,7 @@ def run_individual_node(
             number,
             repeat,
             min_repeat_ms,
-            max_repeat_num,
+            limit_zero_time_iterations,
             cooldown_interval_ms,
             repeats_to_cooldown,
         )

diff --git a/python/tvm/contrib/graph_executor.py b/python/tvm/contrib/graph_executor.py
@@ -355,7 +355,7 @@ def benchmark(
         repeat=5,
         number=5,
         min_repeat_ms=None,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         end_to_end=False,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
@@ -403,7 +403,7 @@ def benchmark(
             milliseconds. This can be used to ensure that the function is run enough to get an
             accurate measurement.
 
-        max_repeat_num : Optional[int]
+        limit_zero_time_iterations : Optional[int]
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -442,7 +442,7 @@ def benchmark(
                 repeat=repeat,
                 number=number,
                 min_repeat_ms=min_repeat_ms,
-                max_repeat_num=max_repeat_num,
+                limit_zero_time_iterations=limit_zero_time_iterations,
             )(device.device_type % rpc_base.RPC_SESS_MASK, device.device_id, *args)
         if kwargs:
             self.set_input(**kwargs)
@@ -452,7 +452,7 @@ def benchmark(
             repeat=repeat,
             number=number,
             min_repeat_ms=min_repeat_ms,
-            max_repeat_num=max_repeat_num,
+            limit_zero_time_iterations=limit_zero_time_iterations,
             cooldown_interval_ms=cooldown_interval_ms,
             repeats_to_cooldown=repeats_to_cooldown,
         )()
diff --git a/python/tvm/runtime/module.py b/python/tvm/runtime/module.py
@@ -277,7 +277,7 @@ def time_evaluator(
         number=10,
         repeat=1,
         min_repeat_ms=0,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
         f_preproc="",
@@ -311,7 +311,7 @@ def time_evaluator(
             i.e., When the run time of one `repeat` falls below this time, the `number` parameter
             will be automatically increased.
 
-        max_repeat_num: int, optional
+        limit_zero_time_iterations: int, optional
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -345,7 +345,7 @@ def time_evaluator(
                 number,
                 repeat,
                 min_repeat_ms,
-                max_repeat_num,
+                limit_zero_time_iterations,
                 cooldown_interval_ms,
                 repeats_to_cooldown,
                 f_preproc,

diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py
@@ -583,7 +583,7 @@ def benchmark(
         repeat=5,
         number=5,
         min_repeat_ms=None,
-        max_repeat_num=100,
+        limit_zero_time_iterations=100,
         end_to_end=False,
         cooldown_interval_ms=0,
         repeats_to_cooldown=1,
@@ -631,7 +631,7 @@ def benchmark(
             milliseconds. This can be used to ensure that the function is run enough to get an
             accurate measurement.
 
-        max_repeat_num : Optional[int]
+        limit_zero_time_iterations : Optional[int]
             The maximum number of repeats when measured time is equal to 0.
             It helps to avoid hanging during measurements.
 
@@ -677,7 +677,7 @@ def benchmark(
                 repeat=repeat,
                 number=number,
                 min_repeat_ms=min_repeat_ms,
-                max_repeat_num=max_repeat_num,
+                limit_zero_time_iterations=limit_zero_time_iterations,
             )(func_name, device.device_type % RPC_SESS_MASK, device.device_id, *packed_args)
         if args or kwargs:
             self.set_input(func_name, *args, **kwargs)
@@ -687,7 +687,7 @@ def benchmark(
             repeat=repeat,
             number=number,
             min_repeat_ms=min_repeat_ms,
-            max_repeat_num=max_repeat_num,
+            limit_zero_time_iterations=limit_zero_time_iterations,
             cooldown_interval_ms=cooldown_interval_ms,
             repeats_to_cooldown=repeats_to_cooldown,
         )(func_name)
diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c
@@ -21,6 +21,7 @@
 
 #include <assert.h>
 #include <inttypes.h>
+#include <math.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -477,7 +478,7 @@ typedef struct {
   int number;
   int repeat;
   int min_repeat_ms;
-  int max_repeat_num;
+  int limit_zero_time_iterations;
   int cooldown_interval_ms;
   int repeats_to_cooldown;
 } time_evaluator_state_t;
@@ -507,7 +508,7 @@ int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* re
   g_time_evaluator_state.number = args[4].v_int64;
   g_time_evaluator_state.repeat = args[5].v_int64;
   g_time_evaluator_state.min_repeat_ms = args[6].v_int64;
-  g_time_evaluator_state.min_repeat_num = args[7].v_int64;
+  g_time_evaluator_state.limit_zero_time_iterations = args[7].v_int64;
   g_time_evaluator_state.cooldown_interval_ms = args[8].v_int64;
   g_time_evaluator_state.repeats_to_cooldown = args[9].v_int64;
 
@@ -592,8 +593,8 @@ tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue*
         goto release_and_return;
       }
       if (std::fpclassify(curr_res_seconds) == FP_ZERO) absolute_zero_times++;
-      if (absolute_zero_times >= max_repeat_num) break;
-    } while (curr_res_seconds < min_repeat_seconds);
+    } while (curr_res_seconds < min_repeat_seconds &&
+             absolute_zero_times < g_time_evaluator_state.limit_zero_time_iterations);
     double mean_exec_seconds = curr_res_seconds / g_time_evaluator_state.number;
     *iter = mean_exec_seconds;
     iter++;

diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -56,8 +56,9 @@ class GraphExecutorDebug : public GraphExecutor {
    *        By default, one `repeat` contains `number` runs. If this parameter is set,
    *        the parameters `number` will be dynamically adjusted to meet the
    *        minimum duration requirement of one `repeat`.
-   * \param max_repeat_ms The maximum number of repeats when measured time is equal to 0.
-   *        It helps to avoid hanging during measurements.
+   * \param limit_zero_time_iterations The maximum number of repeats when
+   *        measured time is equal to 0.  It helps to avoid hanging during
+   *        measurements.
    * \param cooldown_interval_ms The cooldown interval in milliseconds between the number of repeats
    *        defined by `repeats_to_cooldown`.
    * \param repeats_to_cooldown The number of repeats before the
@@ -66,23 +67,25 @@ class GraphExecutorDebug : public GraphExecutor {
    * representing the number of layers. Next the encoded real numbers are float32_t in the number of
    * repeat multiplied by the number of layers.
    */
-  std::string RunIndividual(int number, int repeat, int min_repeat_ms, int max_repeat_num,
-                            int cooldown_interval_ms, int repeats_to_cooldown) {
+  std::string RunIndividual(int number, int repeat, int min_repeat_ms,
+                            int limit_zero_time_iterations, int cooldown_interval_ms,
+                            int repeats_to_cooldown) {
     // warmup run
     GraphExecutor::Run();
     std::string tkey = module_->type_key();
     std::vector<std::vector<double>> time_sec_per_op(op_execs_.size());
     if (tkey == "rpc") {
       // RPC modules rely on remote timing which implements the logic from the else branch.
       for (size_t index = 0; index < op_execs_.size(); ++index) {
-        time_sec_per_op[index] = RunOpRPC(index, number, repeat, min_repeat_ms, max_repeat_num,
-                                          cooldown_interval_ms, repeats_to_cooldown);
+        time_sec_per_op[index] =
+            RunOpRPC(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
+                     cooldown_interval_ms, repeats_to_cooldown);
       }
     } else {
       int op = 0;
       for (size_t index = 0; index < op_execs_.size(); ++index) {
         std::string result_str =
-            RunIndividualNode(index, number, repeat, min_repeat_ms, max_repeat_num,
+            RunIndividualNode(index, number, repeat, min_repeat_ms, limit_zero_time_iterations,
                               cooldown_interval_ms, repeats_to_cooldown);
         const double* blob_ptr = reinterpret_cast<const double*>(result_str.data());
         for (int i = 0; i < repeat; ++i, ++blob_ptr) {
@@ -113,7 +116,7 @@ class GraphExecutorDebug : public GraphExecutor {
   }
 
   std::string RunIndividualNode(int node_index, int number, int repeat, int min_repeat_ms,
-                                int max_repeat_num, int cooldown_interval_ms,
+                                int limit_zero_time_iterations, int cooldown_interval_ms,
                                 int repeats_to_cooldown) {
     std::string tkey = module_->type_key();
 
@@ -135,12 +138,13 @@ class GraphExecutorDebug : public GraphExecutor {
     Device& d = devices_[0];
     PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
         TypedPackedFunc<void()>([this, node_index]() { this->RunOpHost(node_index); }), d, number,
-        repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms, repeats_to_cooldown);
+        repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
+        repeats_to_cooldown);
     return time_evaluator();
   }
 
   std::vector<double> RunOpRPC(int index, int number, int repeat, int min_repeat_ms,
-                               int max_repeat_num, int cooldown_interval_ms,
+                               int limit_zero_time_iterations, int cooldown_interval_ms,
                                int repeats_to_cooldown) {
     std::vector<double> results(repeat, 0);
     // Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes
@@ -167,7 +171,7 @@ class GraphExecutorDebug : public GraphExecutor {
         runtime::Registry::Get("runtime.RPCTimeEvaluator")
             ->
             operator()(module_, name, static_cast<int>(dev.device_type), dev.device_id, number,
-                       repeat, min_repeat_ms, max_repeat_num, cooldown_interval_ms,
+                       repeat, min_repeat_ms, limit_zero_time_iterations, cooldown_interval_ms,
                        repeats_to_cooldown, "");
 
     int num_flat_args = num_inputs + num_outputs;
@@ -391,17 +395,18 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
       int number = args[0];
       int repeat = args[1];
       int min_repeat_ms = args[2];
-      int max_repeat_num = args[3];
+      int limit_zero_time_iterations = args[3];
       int cooldown_interval_ms = args[4];
       int repeats_to_cooldown = args[5];
       ICHECK_GT(number, 0);
       ICHECK_GT(repeat, 0);
       ICHECK_GE(min_repeat_ms, 0);
-      ICHECK_GE(max_repeat_num, 0);
+      ICHECK_GE(limit_zero_time_iterations, 0);
       ICHECK_GE(cooldown_interval_ms, 0);
       ICHECK_GT(repeats_to_cooldown, 0);
-      std::string blob = this->RunIndividual(number, repeat, min_repeat_ms, max_repeat_num,
-                                             cooldown_interval_ms, repeats_to_cooldown);
+      std::string blob =
+          this->RunIndividual(number, repeat, min_repeat_ms, limit_zero_time_iterations,
+                              cooldown_interval_ms, repeats_to_cooldown);
       TVMByteArray arr;
       arr.size = blob.length();
       arr.data = blob.data();
@@ -413,20 +418,20 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
       int number = args[1];
       int repeat = args[2];
       int min_repeat_ms = args[3];
-      int max_repeat_num = args[4];
+      int limit_zero_time_iterations = args[4];
       int cooldown_interval_ms = args[5];
       int repeats_to_cooldown = args[6];
       ICHECK_GE(node_index, 0);
       ICHECK_LT(node_index, nodes_.size());
       ICHECK_GT(number, 0);
       ICHECK_GT(repeat, 0);
       ICHECK_GE(min_repeat_ms, 0);
-      ICHECK_GE(max_repeat_num, 0);
+      ICHECK_GE(limit_zero_time_iterations, 0);
       ICHECK_GE(cooldown_interval_ms, 0);
       ICHECK_GT(repeats_to_cooldown, 0);
-      std::string blob =
-          this->RunIndividualNode(node_index, number, repeat, min_repeat_ms, max_repeat_num,
-                                  cooldown_interval_ms, repeats_to_cooldown);
+      std::string blob = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms,
+                                                 limit_zero_time_iterations, cooldown_interval_ms,
+                                                 repeats_to_cooldown);
       TVMByteArray arr;
       arr.size = blob.length();
       arr.data = blob.data();