apache · echuraev · Jun 5, 2023 · May 23, 2023 · May 31, 2023 · May 31, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -494,6 +494,7 @@ include(cmake/modules/VTA.cmake)
 include(cmake/modules/StandaloneCrt.cmake)
 include(cmake/modules/CUDA.cmake)
 include(cmake/modules/Hexagon.cmake) # This must come before logging.cmake
+include(cmake/modules/contrib/CLML.cmake) # Must be before OpenCL.cmake
 include(cmake/modules/OpenCL.cmake)
 include(cmake/modules/OpenMP.cmake)
 include(cmake/modules/Vulkan.cmake)
@@ -527,7 +528,6 @@ include(cmake/modules/contrib/ArmComputeLib.cmake)
 include(cmake/modules/contrib/TensorRT.cmake)
 include(cmake/modules/contrib/VitisAI.cmake)
 include(cmake/modules/contrib/Verilator.cmake)
-include(cmake/modules/contrib/CLML.cmake)
 include(cmake/modules/contrib/UMA.cmake)
 include(cmake/modules/Git.cmake)
 include(cmake/modules/LibInfo.cmake)

diff --git a/apps/cpp_clml/clml_runner.cc b/apps/cpp_clml/clml_runner.cc
@@ -50,8 +50,8 @@ CLMLRunner::CLMLRunner(std::string name, ToolArgs& args, cl_platform_id arg_plat
       context(arg_context),
       device_id(arg_device_id),
       queue(arg_queue) {
-  LOG(INFO) << "CLMLRunner Constructor: Input:" << r_args.input << " Output:" << r_args.output
-            << " Params:" << r_args.params;
+  LOG(INFO) << "CLMLRunner Constructor:" << name << " Input:" << r_args.input
+            << " Output:" << r_args.output << " Params:" << r_args.params;
   cl_int result;
 
   // Query and Get CLML Interface
@@ -648,25 +648,29 @@ void CLMLRunner::MakeConcatenate(
 void CLMLRunner::MakeDense(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
                            std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
                            std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
-                           std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc,
+                           std::vector<cl_uint> in_shape, std::vector<cl_uint> wt_shape,
                            std::string dtype) {
   cl_arithmetic_mode_qcom cl_arithmetic_mode = MakeCLArithMode(MakeCLDataType(dtype));
   cl_ml_op_qcom op = nullptr;
   cl_int result;
+  cl_gemm_transform_qcom b_transform = CL_GEMM_TRANSFORM_NONE_QCOM;
 
-  cl_ml_op_convolution_desc_qcom conv_desc = {CL_CONVOLUTION_MODE_CONVOLUTION_QCOM,
-                                              1,
-                                              4,
-                                              {0, 0},
-                                              {0, 0},
-                                              {1, 1},
-                                              {1, 1},
-                                              0,
-                                              cl_arithmetic_mode};
-
-  result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
-      this->context, 0, &conv_desc, input_desc->tensor, weight_desc->tensor, bias_desc->tensor,
-      output_desc->tensor, &op, tuning_cache);
+  if (in_shape[1] == wt_shape[1]) {
+    b_transform = CL_GEMM_TRANSFORM_TRANSPOSE_QCOM;
+  }
+
+  cl_ml_op_gemm_desc_qcom gemmDesc = {in_shape[0],                  // m
+                                      wt_shape[0],                  // n
+                                      wt_shape[1],                  // k
+                                      CL_GEMM_TRANSFORM_NONE_QCOM,  // A transform
+                                      b_transform,                  // B transform
+                                      {{1.0}, CL_FLOAT},            // alpha
+                                      {{0.0}, CL_FLOAT},            // beta
+                                      cl_arithmetic_mode};
+
+  result =
+      h_ClmlIntf->clCreateMLOpGemmQCOM(this->context, 0, &gemmDesc, input_desc->tensor,
+                                       weight_desc->tensor, output_desc->tensor, &op, tuning_cache);
 
   CLML_SDK_TEST_AND_EXIT(op && result == CL_SUCCESS);
   this->function.push_back(op);

diff --git a/apps/cpp_clml/clml_runner.h b/apps/cpp_clml/clml_runner.h
@@ -178,7 +178,7 @@ class CLMLRunner {
   void MakeDense(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
-                 std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc, std::string dtype);
+                 std::vector<cl_uint> in_shape, std::vector<cl_uint> wt_shape, std::string dtype);
 
   /*! \brief SoftMax layer implementattion */
   void MakeSoftMax(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,

diff --git a/apps/cpp_clml/scripts/clml_codegen.py b/apps/cpp_clml/scripts/clml_codegen.py
@@ -45,7 +45,7 @@ def main():
         clml_mod = clml.partition_for_clml(mod, params)
         libm = relay.build(
             clml_mod,
-            target="opencl -device=adreno",
+            target="opencl",
             target_host="llvm -mtriple=aarch64-linux-gnu",
             params=params,
         )

diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
@@ -59,20 +59,33 @@ if(USE_OPENCL)
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})
   endif()
 
-  if(DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
-    include(FetchContent)
-    FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
-    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-    FetchContent_MakeAvailable(googletest)
-    install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
+  if(DEFINED USE_OPENCL_GTEST)
+    if(EXISTS ${USE_OPENCL_GTEST})
+        include(FetchContent)
+        FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
+        set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+        FetchContent_MakeAvailable(googletest)
+        install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
 
-    message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
+        message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
+        set(Build_OpenCL_GTests ON)
+    elseif (ANDROID_ABI AND DEFINED ENV{ANDROID_NDK_HOME})
+        set(GOOGLETEST_ROOT $ENV{ANDROID_NDK_HOME}/sources/third_party/googletest)
+        add_library(gtest_main STATIC ${GOOGLETEST_ROOT}/src/gtest_main.cc ${GOOGLETEST_ROOT}/src/gtest-all.cc)
+        target_include_directories(gtest_main PRIVATE ${GOOGLETEST_ROOT})
+        target_include_directories(gtest_main PUBLIC ${GOOGLETEST_ROOT}/include)
+        message(STATUS "Using gtest from Android NDK")
+        set(Build_OpenCL_GTests ON)
+    endif()
 
-    tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS
-      "${CMAKE_SOURCE_DIR}/tests/cpp-runtime/opencl/*.cc"
-    )
-    add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
-    target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
+    if(Build_OpenCL_GTests)
+        message(STATUS "Building OpenCL-Gtests")
+        tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS
+          "${CMAKE_SOURCE_DIR}/tests/cpp-runtime/opencl/*.cc"
+        )
+        add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
+        target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
+    endif()
   endif()
   list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
   if(USE_OPENCL_ENABLE_HOST_PTR)