Skip to content

Commit

Permalink
[OpenCLML] Reactor and introduce on chip memory and memory planner (a…
Browse files Browse the repository at this point in the history
…pache#14922)

* Reactor and introduce in chip memory and memory planner

Introduced thread context with CLMLWorkspace.
Organized the code as runtime, utils and memory planners
Introcuded recording queue support and on chip memory support.
On chip memory allocation planner to acommodate multiple tensors at a time.
DDR memory planner introduced to reuse the underlaying memory across
multiple tensor descriptors.

Dense layer support refactored to use GEMM.
CLML binary operators doesn't support broadcasting. Hence introduced an explicite
broadcast op as a work around.

clml SDK codegen is enhanced accordingly.

* * review comments

* * Memory planner cpp_runtime tests.

* * gtest build rules while in android environments.

* * review comments

---------

Co-authored-by: Siva Rama Krishna Reddy B <sivb@blr-ubuntu-ripper.qualcomm.com>
  • Loading branch information
2 people authored and junrushao committed Jun 22, 2023
1 parent 883e3fb commit c6200ac
Show file tree
Hide file tree
Showing 16 changed files with 2,088 additions and 597 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ include(cmake/modules/VTA.cmake)
include(cmake/modules/StandaloneCrt.cmake)
include(cmake/modules/CUDA.cmake)
include(cmake/modules/Hexagon.cmake) # This must come before logging.cmake
include(cmake/modules/contrib/CLML.cmake) # Must be before OpenCL.cmake
include(cmake/modules/OpenCL.cmake)
include(cmake/modules/OpenMP.cmake)
include(cmake/modules/Vulkan.cmake)
Expand Down Expand Up @@ -540,7 +541,6 @@ include(cmake/modules/contrib/ArmComputeLib.cmake)
include(cmake/modules/contrib/TensorRT.cmake)
include(cmake/modules/contrib/VitisAI.cmake)
include(cmake/modules/contrib/Verilator.cmake)
include(cmake/modules/contrib/CLML.cmake)
include(cmake/modules/contrib/UMA.cmake)
include(cmake/modules/Git.cmake)
include(cmake/modules/LibInfo.cmake)
Expand Down
36 changes: 20 additions & 16 deletions apps/cpp_clml/clml_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ CLMLRunner::CLMLRunner(std::string name, ToolArgs& args, cl_platform_id arg_plat
context(arg_context),
device_id(arg_device_id),
queue(arg_queue) {
LOG(INFO) << "CLMLRunner Constructor: Input:" << r_args.input << " Output:" << r_args.output
<< " Params:" << r_args.params;
LOG(INFO) << "CLMLRunner Constructor:" << name << " Input:" << r_args.input
<< " Output:" << r_args.output << " Params:" << r_args.params;
cl_int result;

// Query and Get CLML Interface
Expand Down Expand Up @@ -648,25 +648,29 @@ void CLMLRunner::MakeConcatenate(
void CLMLRunner::MakeDense(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc,
std::vector<cl_uint> in_shape, std::vector<cl_uint> wt_shape,
std::string dtype) {
cl_arithmetic_mode_qcom cl_arithmetic_mode = MakeCLArithMode(MakeCLDataType(dtype));
cl_ml_op_qcom op = nullptr;
cl_int result;
cl_gemm_transform_qcom b_transform = CL_GEMM_TRANSFORM_NONE_QCOM;

cl_ml_op_convolution_desc_qcom conv_desc = {CL_CONVOLUTION_MODE_CONVOLUTION_QCOM,
1,
4,
{0, 0},
{0, 0},
{1, 1},
{1, 1},
0,
cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
this->context, 0, &conv_desc, input_desc->tensor, weight_desc->tensor, bias_desc->tensor,
output_desc->tensor, &op, tuning_cache);
if (in_shape[1] == wt_shape[1]) {
b_transform = CL_GEMM_TRANSFORM_TRANSPOSE_QCOM;
}

cl_ml_op_gemm_desc_qcom gemmDesc = {in_shape[0], // m
wt_shape[0], // n
wt_shape[1], // k
CL_GEMM_TRANSFORM_NONE_QCOM, // A transform
b_transform, // B transform
{{1.0}, CL_FLOAT}, // alpha
{{0.0}, CL_FLOAT}, // beta
cl_arithmetic_mode};

result =
h_ClmlIntf->clCreateMLOpGemmQCOM(this->context, 0, &gemmDesc, input_desc->tensor,
weight_desc->tensor, output_desc->tensor, &op, tuning_cache);

CLML_SDK_TEST_AND_EXIT(op && result == CL_SUCCESS);
this->function.push_back(op);
Expand Down
2 changes: 1 addition & 1 deletion apps/cpp_clml/clml_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class CLMLRunner {
void MakeDense(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc, std::string dtype);
std::vector<cl_uint> in_shape, std::vector<cl_uint> wt_shape, std::string dtype);

/*! \brief SoftMax layer implementattion */
void MakeSoftMax(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
Expand Down
2 changes: 1 addition & 1 deletion apps/cpp_clml/scripts/clml_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def main():
clml_mod = clml.partition_for_clml(mod, params)
libm = relay.build(
clml_mod,
target="opencl -device=adreno",
target="opencl",
target_host="llvm -mtriple=aarch64-linux-gnu",
params=params,
)
Expand Down
39 changes: 27 additions & 12 deletions cmake/modules/OpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,35 @@ if(USE_OPENCL)
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})
endif()

if(DEFINED USE_OPENCL_GTEST AND EXISTS ${USE_OPENCL_GTEST})
include(FetchContent)
FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})
if(DEFINED USE_OPENCL_GTEST)
if(EXISTS ${USE_OPENCL_GTEST})
include(FetchContent)
FetchContent_Declare(googletest SOURCE_DIR "${USE_OPENCL_GTEST}")
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
install(TARGETS gtest EXPORT ${PROJECT_NAME}Targets DESTINATION lib${LIB_SUFFIX})

message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
message(STATUS "Found OpenCL gtest at ${USE_OPENCL_GTEST}")
set(Build_OpenCL_GTests ON)
elseif (ANDROID_ABI AND DEFINED ENV{ANDROID_NDK_HOME})
set(GOOGLETEST_ROOT $ENV{ANDROID_NDK_HOME}/sources/third_party/googletest)
add_library(gtest_main STATIC ${GOOGLETEST_ROOT}/src/gtest_main.cc ${GOOGLETEST_ROOT}/src/gtest-all.cc)
target_include_directories(gtest_main PRIVATE ${GOOGLETEST_ROOT})
target_include_directories(gtest_main PUBLIC ${GOOGLETEST_ROOT}/include)
message(STATUS "Using gtest from Android NDK")
set(Build_OpenCL_GTests ON)
endif()

tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS
"${CMAKE_SOURCE_DIR}/tests/cpp-runtime/opencl/*.cc"
)
add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
if(Build_OpenCL_GTests)
message(STATUS "Building OpenCL-Gtests")
tvm_file_glob(GLOB_RECURSE OPENCL_TEST_SRCS
"${CMAKE_SOURCE_DIR}/tests/cpp-runtime/opencl/*.cc"
)
add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
else()
message(STATUS "Couldn't build OpenCL-Gtests")
endif()
endif()
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
if(USE_OPENCL_ENABLE_HOST_PTR)
Expand Down
Loading

0 comments on commit c6200ac

Please sign in to comment.