Skip to content

Commit

Permalink
[OpenCLML] CLML Profiling fixes corresponding to OpenCL Timer recent …
Browse files Browse the repository at this point in the history
…changes.
  • Loading branch information
srkreddy1238 committed Sep 6, 2022
1 parent 5dcf622 commit d034839
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 30 deletions.
60 changes: 38 additions & 22 deletions src/runtime/contrib/clml/clml_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,29 @@ class CLMLRuntime : public JSONRuntimeBase {
*/
const char* type_key() const override { return "clml"; }

/*!
* \brief get command queue instance from OpenCL workspace
*
* \return reference for OpenCL command queue
*/
cl_command_queue GetCommadQueue(void) {
// Reuse the OpenCl work space from TVM Device API.
auto func = tvm::runtime::Registry::Get("device_api.opencl");
ICHECK(func != nullptr) << "Cannot find OpenCL device_api in registry";
auto device_api = static_cast<cl::OpenCLWorkspace*>(((*func)()).operator void*());
this->context = device_api->context;
bool queue_found = false;
for (size_t i = 0; i < device_api->devices.size(); ++i) {
if (device_api->devices[i] == device_id) {
this->queue = device_api->queues[i];
this->evts = &(device_api->events[i]);
queue_found = true;
}
}
ICHECK(queue_found != false) << "Device queue not found in OpenCL Workspace";
return this->queue;
}

/*!
* \brief Initialize runtime. Create CLML layer from JSON
* representation.
Expand Down Expand Up @@ -146,22 +169,7 @@ class CLMLRuntime : public JSONRuntimeBase {
LOG(WARNING) << "CLML Runtime Init: Qualcomm extn not present.\n";
return;
}

// Reuse the OpenCl work space from TVM Device API.
auto func = tvm::runtime::Registry::Get("device_api.opencl");
ICHECK(func != nullptr) << "Cannot find OpenCL device_api in registry";
auto device_api = static_cast<cl::OpenCLWorkspace*>(((*func)()).operator void*());
this->context = device_api->context;
bool queue_found = false;
for (size_t i = 0; i < device_api->devices.size(); ++i) {
if (device_api->devices[i] == device_id) {
this->queue = device_api->queues[i];
this->evts = &(device_api->events[i]);
queue_found = true;
}
}
ICHECK(queue_found != false) << "Device queue not found in OpenCL Workspace";

this->queue = GetCommadQueue();
// Query and Get CLML Interface
static const cl_uint MAX_VERSIONS = 256;
cl_int majorVersions[MAX_VERSIONS];
Expand Down Expand Up @@ -220,7 +228,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_ml_tensor_layout_qcom layout = CL_TENSOR_LAYOUT_NCHW_QCOM) {
cl_int result = 0;
cl_event evt = NULL;
result = h_ClmlIntf->clEnqueueWriteMLTensorDataQCOM(queue, data, layout, tensor->tensor,
result = h_ClmlIntf->clEnqueueWriteMLTensorDataQCOM(GetCommadQueue(),
data, layout, tensor->tensor,
tensor->memory,
0, // n waitlist
NULL, // waitlist
Expand All @@ -233,7 +242,8 @@ class CLMLRuntime : public JSONRuntimeBase {
cl_int result = 0;
cl_event readEvent = NULL;
// Read the output tensor
result = h_ClmlIntf->clEnqueueReadMLTensorDataQCOM(queue, tensor->tensor, tensor->memory, data,
result = h_ClmlIntf->clEnqueueReadMLTensorDataQCOM(GetCommadQueue(),
tensor->tensor, tensor->memory, data,
layout,
0, // n waitlist
NULL, // waitlist
Expand All @@ -253,6 +263,7 @@ class CLMLRuntime : public JSONRuntimeBase {
*/
void Run() override {
cl_int result = 0;
this->queue = GetCommadQueue();
for (size_t i = 0; i < input_nodes_.size(); ++i) {
auto nid = input_nodes_[i];
uint32_t eid = EntryID(nid, 0);
Expand Down Expand Up @@ -286,10 +297,15 @@ class CLMLRuntime : public JSONRuntimeBase {
}

for (size_t i = 0; i < this->layer_.function.size(); ++i) {
this->evts->resize(this->evts->size() + 1);
cl_event* evt = &(this->evts->back());
result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
if (getenv("CLML_PROFILING")) {
this->evts->resize(this->evts->size() + 1);
cl_event* evt = &(this->evts->back());
result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
this->layer_.descriptorSet, 0, NULL, evt);
} else {
result = h_ClmlIntf->clEnqueueMLOpQCOM(queue, this->layer_.function[i],
this->layer_.descriptorSet, 0, NULL, NULL);
}
ICHECK(result == CL_SUCCESS) << "clEnqueueMLOpQCOM:" << result;
}

Expand Down Expand Up @@ -449,7 +465,7 @@ class CLMLRuntime : public JSONRuntimeBase {
LOG(WARNING) << "CLML Tunning In Progress:";
for (size_t i = 0; i < this->layer_.function.size(); ++i) {
LOG(WARNING) << "CLML Tunning:" << i;
result = h_ClmlIntf->clTuneMLOpQCOM(queue, this->layer_.function[i],
result = h_ClmlIntf->clTuneMLOpQCOM(GetCommadQueue(), this->layer_.function[i],
this->layer_.descriptorSet, this->tuning_cache, NULL);
ICHECK(result == CL_SUCCESS) << "clTuneMLOpQCOM:" << result;
}
Expand Down
6 changes: 3 additions & 3 deletions tests/python/contrib/test_clml/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ class Device:

connection_type = "tracker"
host = "localhost"
port = 9090
port = 9150
target = "opencl"
target_host = "llvm -mtriple=aarch64-linux-gnu"
device_key = ""
cross_compile = ""
device_key = "android"
cross_compile = "aarch64-linux-android-g++"

def __init__(self):
"""Keep remote device for lifetime of object."""
Expand Down
5 changes: 2 additions & 3 deletions tests/python/contrib/test_clml/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
from tvm import relay

import tvm
from test_clml.infrastructure import skip_runtime_test, build_and_run
from test_clml.infrastructure import Device
from infrastructure import skip_runtime_test, build_and_run, Device


def _build_and_run_network(mod, params, inputs, data, device, atol, rtol):
Expand Down Expand Up @@ -86,7 +85,7 @@ def get_model():
mobilenet = MobileNet(
include_top=True, weights=None, input_shape=(224, 224, 3), classes=1000
)
mobilenet.load_weights("mobilenet_1_0_224_tf.h5")
#mobilenet.load_weights("mobilenet_1_0_224_tf.h5")
inputs = {mobilenet.input_names[0]: ((1, 3, 224, 224), "float32")}

data = {}
Expand Down
4 changes: 2 additions & 2 deletions tests/python/contrib/test_clml/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from tvm import relay
from tvm.ir import IRModule

from test_clml.infrastructure import (
from infrastructure import (
skip_runtime_test,
skip_codegen_test,
build_and_run,
Expand Down Expand Up @@ -212,5 +212,5 @@ def test_batchnorm():


if __name__ == "__main__":
# test_conv2d()
test_conv2d()
test_batchnorm()

0 comments on commit d034839

Please sign in to comment.