[OpenCLML] CLML Profiling fixes corresponding to OpenCL Timer recent …

…changes.
apache · Sep 6, 2022 · d034839 · d034839
1 parent 5dcf622
commit d034839
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 30 deletions.
diff --git a/src/runtime/contrib/clml/clml_runtime.cc b/src/runtime/contrib/clml/clml_runtime.cc
@@ -100,6 +100,29 @@ class CLMLRuntime : public JSONRuntimeBase {
    */
   const char* type_key() const override { return "clml"; }
 
+  /*!
+   * \brief get command queue instance from OpenCL workspace
+   *
+   * \return reference for OpenCL command queue
+   */
+  cl_command_queue GetCommadQueue(void) {
+    // Reuse the OpenCl work space from TVM Device API.
+    auto func = tvm::runtime::Registry::Get("device_api.opencl");
+    ICHECK(func != nullptr) << "Cannot find OpenCL device_api in registry";
+    auto device_api = static_cast<cl::OpenCLWorkspace*>(((*func)()).operator void*());
+    this->context = device_api->context;
+    bool queue_found = false;
+    for (size_t i = 0; i < device_api->devices.size(); ++i) {
+      if (device_api->devices[i] == device_id) {
+        this->queue = device_api->queues[i];
+        this->evts = &(device_api->events[i]);
+        queue_found = true;
+      }
+    }
+    ICHECK(queue_found != false) << "Device queue not found in OpenCL Workspace";
+    return this->queue;
+  }
+
   /*!
    * \brief Initialize runtime. Create CLML layer from JSON
    * representation.
@@ -146,22 +169,7 @@ class CLMLRuntime : public JSONRuntimeBase {
       LOG(WARNING) << "CLML Runtime Init: Qualcomm extn not present.\n";
       return;
     }
-
-    // Reuse the OpenCl work space from TVM Device API.
-    auto func = tvm::runtime::Registry::Get("device_api.opencl");
-    ICHECK(func != nullptr) << "Cannot find OpenCL device_api in registry";
-    auto device_api = static_cast<cl::OpenCLWorkspace*>(((*func)()).operator void*());
-    this->context = device_api->context;
-    bool queue_found = false;
-    for (size_t i = 0; i < device_api->devices.size(); ++i) {
-      if (device_api->devices[i] == device_id) {
-        this->queue = device_api->queues[i];
-        this->evts = &(device_api->events[i]);
-        queue_found = true;
-      }
-    }
-    ICHECK(queue_found != false) << "Device queue not found in OpenCL Workspace";
-
+    this->queue = GetCommadQueue();
     // Query and Get CLML Interface
     static const cl_uint MAX_VERSIONS = 256;
     cl_int majorVersions[MAX_VERSIONS];
@@ -220,7 +228,8 @@ class CLMLRuntime : public JSONRuntimeBase {
                             cl_ml_tensor_layout_qcom layout = CL_TENSOR_LAYOUT_NCHW_QCOM) {
     cl_int result = 0;
     cl_event evt = NULL;
-    result = h_ClmlIntf->clEnqueueWriteMLTensorDataQCOM(queue, data, layout, tensor->tensor,
+    result = h_ClmlIntf->clEnqueueWriteMLTensorDataQCOM(GetCommadQueue(),
+                                                        data, layout, tensor->tensor,
                                                         tensor->memory,
                                                         0,      // n waitlist
                                                         NULL,   // waitlist
@@ -233,7 +242,8 @@ class CLMLRuntime : public JSONRuntimeBase {
     cl_int result = 0;
     cl_event readEvent = NULL;
     // Read the output tensor
-    result = h_ClmlIntf->clEnqueueReadMLTensorDataQCOM(queue, tensor->tensor, tensor->memory, data,
+    result = h_ClmlIntf->clEnqueueReadMLTensorDataQCOM(GetCommadQueue(),
+                                                       tensor->tensor, tensor->memory, data,
                                                        layout,
                                                        0,            // n waitlist
                                                        NULL,         // waitlist
@@ -253,6 +263,7 @@ class CLMLRuntime : public JSONRuntimeBase {
    */
   void Run() override {
     cl_int result = 0;
+    this->queue = GetCommadQueue();
     for (size_t i = 0; i < input_nodes_.size(); ++i) {
       auto nid = input_nodes_[i];
       uint32_t eid = EntryID(nid, 0);
@@ -286,10 +297,15 @@ class CLMLRuntime : public JSONRuntimeBase {
     }
 
     for (size_t i = 0; i < this->layer_.function.size(); ++i) {
-      this->evts->resize(this->evts->size() + 1);
-      cl_event* evt = &(this->evts->back());
-      result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
+      if (getenv("CLML_PROFILING")) {
+        this->evts->resize(this->evts->size() + 1);
+        cl_event* evt = &(this->evts->back());
+        result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
                                              this->layer_.descriptorSet, 0, NULL, evt);
+      } else {
+        result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
+                                             this->layer_.descriptorSet, 0, NULL, NULL);
+      }
       ICHECK(result == CL_SUCCESS) << "clEnqueueMLOpQCOM:" << result;
     }
 
@@ -449,7 +465,7 @@ class CLMLRuntime : public JSONRuntimeBase {
       LOG(WARNING) << "CLML Tunning In Progress:";
       for (size_t i = 0; i < this->layer_.function.size(); ++i) {
         LOG(WARNING) << "CLML Tunning:" << i;
-        result = h_ClmlIntf->clTuneMLOpQCOM(queue, this->layer_.function[i],
+        result = h_ClmlIntf->clTuneMLOpQCOM(GetCommadQueue(), this->layer_.function[i],
                                             this->layer_.descriptorSet, this->tuning_cache, NULL);
         ICHECK(result == CL_SUCCESS) << "clTuneMLOpQCOM:" << result;
       }

diff --git a/tests/python/contrib/test_clml/infrastructure.py b/tests/python/contrib/test_clml/infrastructure.py
@@ -73,11 +73,11 @@ class Device:
 
     connection_type = "tracker"
     host = "localhost"
-    port = 9090
+    port = 9150
     target = "opencl"
     target_host = "llvm -mtriple=aarch64-linux-gnu"
-    device_key = ""
-    cross_compile = ""
+    device_key = "android"
+    cross_compile = "aarch64-linux-android-g++"
 
     def __init__(self):
         """Keep remote device for lifetime of object."""

diff --git a/tests/python/contrib/test_clml/test_network.py b/tests/python/contrib/test_clml/test_network.py
@@ -22,8 +22,7 @@
 from tvm import relay
 
 import tvm
-from test_clml.infrastructure import skip_runtime_test, build_and_run
-from test_clml.infrastructure import Device
+from infrastructure import skip_runtime_test, build_and_run, Device
 
 
 def _build_and_run_network(mod, params, inputs, data, device, atol, rtol):
@@ -86,7 +85,7 @@ def get_model():
         mobilenet = MobileNet(
             include_top=True, weights=None, input_shape=(224, 224, 3), classes=1000
         )
-        mobilenet.load_weights("mobilenet_1_0_224_tf.h5")
+        #mobilenet.load_weights("mobilenet_1_0_224_tf.h5")
         inputs = {mobilenet.input_names[0]: ((1, 3, 224, 224), "float32")}
 
         data = {}

diff --git a/tests/python/contrib/test_clml/test_ops.py b/tests/python/contrib/test_clml/test_ops.py
@@ -25,7 +25,7 @@
 from tvm import relay
 from tvm.ir import IRModule
 
-from test_clml.infrastructure import (
+from infrastructure import (
     skip_runtime_test,
     skip_codegen_test,
     build_and_run,
@@ -212,5 +212,5 @@ def test_batchnorm():
 
 
 if __name__ == "__main__":
-    # test_conv2d()
+    test_conv2d()
     test_batchnorm()