From bcf6a9fc2ca0ee8f7ed8812ceb095cb5699acb17 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 30 Mar 2020 14:14:38 -0500 Subject: [PATCH 01/12] [RUNTIME] Initial implementation of Hexagon runtime support This is only the TVM runtime. The FastRPC libraries, simulator driver, etc. will be provided in subsequent commits. --- CMakeLists.txt | 53 +- cmake/config.cmake | 3 + cmake/modules/Hexagon.cmake | 96 ++ include/tvm/runtime/c_runtime_api.h | 1 + include/tvm/runtime/device_api.h | 5 +- include/tvm/target/target.h | 4 + .../main/java/org/apache/tvm/TVMContext.java | 15 + python/tvm/__init__.py | 2 +- python/tvm/_ffi/runtime_ctypes.py | 2 + python/tvm/runtime/ndarray.py | 16 + python/tvm/target/__init__.py | 2 +- python/tvm/target/target.py | 90 ++ src/runtime/hexagon/README.md | 76 + src/runtime/hexagon/hexagon_device_api.cc | 149 ++ src/runtime/hexagon/hexagon_module.cc | 561 +++++++ src/runtime/hexagon/hexagon_module.h | 160 ++ src/runtime/hexagon/hexagon_posix.cc | 39 + src/runtime/hexagon/sim/hexagon_device_sim.cc | 1419 +++++++++++++++++ src/runtime/hexagon/sim/hexagon_sim_proto.h | 73 + .../target/fastrpc/tvm_hexagon_remote.h | 129 ++ .../target/fastrpc/tvm_hexagon_remote_nd.h | 88 + .../hexagon/target/hexagon_device_target.cc | 525 ++++++ .../hexagon/target/hexagon_dsprpcapi.cc | 100 ++ .../hexagon/target/hexagon_dsprpcapi.h | 192 +++ src/runtime/hexagon/target/hexagon_stubapi.cc | 109 ++ src/runtime/hexagon/target/hexagon_stubapi.h | 318 ++++ .../hexagon/target/hexagon_target_log.h | 40 + src/target/opt/build_hexagon_off.cc | 35 + src/target/target.cc | 11 +- 29 files changed, 4304 insertions(+), 9 deletions(-) create mode 100644 cmake/modules/Hexagon.cmake create mode 100644 src/runtime/hexagon/README.md create mode 100644 src/runtime/hexagon/hexagon_device_api.cc create mode 100644 src/runtime/hexagon/hexagon_module.cc create mode 100644 src/runtime/hexagon/hexagon_module.h create mode 100644 src/runtime/hexagon/hexagon_posix.cc create mode 100644 src/runtime/hexagon/sim/hexagon_device_sim.cc create mode 100644 src/runtime/hexagon/sim/hexagon_sim_proto.h create mode 100644 src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote.h create mode 100644 src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote_nd.h create mode 100644 src/runtime/hexagon/target/hexagon_device_target.cc create mode 100644 src/runtime/hexagon/target/hexagon_dsprpcapi.cc create mode 100644 src/runtime/hexagon/target/hexagon_dsprpcapi.h create mode 100644 src/runtime/hexagon/target/hexagon_stubapi.cc create mode 100644 src/runtime/hexagon/target/hexagon_stubapi.h create mode 100644 src/runtime/hexagon/target/hexagon_target_log.h create mode 100644 src/target/opt/build_hexagon_off.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 6993f6727871..cf334ff02baf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,8 @@ tvm_option(USE_OPENGL "Build with OpenGL" OFF) tvm_option(USE_METAL "Build with Metal" OFF) tvm_option(USE_ROCM "Build with ROCM" OFF) tvm_option(ROCM_PATH "The path to rocm" /opt/rocm) +tvm_option(USE_HEXAGON_DEVICE "Build with Hexagon device support in TVM runtime" OFF) +tvm_option(USE_HEXAGON_SDK "Path to the Hexagon SDK root (required for Hexagon support in TVM runtime or for building TVM runtime for Hexagon)" /path/to/sdk) tvm_option(USE_RPC "Build with RPC" ON) tvm_option(USE_THREADS "Build with thread support" ON) tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF) @@ -78,7 +80,7 @@ include_directories(${PICOJSON_PATH}) # initial variables set(TVM_LINKER_LIBS "") -set(TVM_RUNTIME_LINKER_LIBS ${CMAKE_DL_LIBS}) +set(TVM_RUNTIME_LINKER_LIBS "") # Generic compilation options if(MSVC) @@ -118,8 +120,33 @@ else(MSVC) CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) set(CMAKE_CXX_FLAGS "-faligned-new ${CMAKE_CXX_FLAGS}") endif() + + # Detect if we're compiling for Hexagon. + set(TEST_FOR_HEXAGON_CXX + "#ifndef __hexagon__" + "#error" + "#endif" + "int main() {}" + # Define _start_main to avoid linking errors with -fPIC. + "extern \"C\" void _start_main() {}") + set(TEST_FOR_HEXAGON_DIR + "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp") + set(TEST_FOR_HEXAGON_FILE "${TEST_FOR_HEXAGON_DIR}/test_for_hexagon.cc") + string(REPLACE ";" "\n" TEST_FOR_HEXAGON_CXX_TEXT "${TEST_FOR_HEXAGON_CXX}") + file(WRITE "${TEST_FOR_HEXAGON_FILE}" "${TEST_FOR_HEXAGON_CXX_TEXT}") + try_compile(BUILD_FOR_HEXAGON "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}" + "${TEST_FOR_HEXAGON_FILE}") + file(REMOVE "${TEST_FOR_HEXAGON_FILE}") + if(BUILD_FOR_HEXAGON) + message(STATUS "Building for Hexagon") + endif() endif(MSVC) +# Hexagon has dlopen built into QuRT (no need for static library). +if(NOT BUILD_FOR_HEXAGON) + string(APPEND TVM_RUNTIME_LINKER_LIBS ${CMAKE_DL_LIBS}) +endif() + # add source group FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc") FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h") @@ -177,6 +204,13 @@ if(USE_VM_PROFILER) list(APPEND COMPILER_SRCS ${BACKEND_VM_PROFILER_SRCS}) endif(USE_VM_PROFILER) +if(BUILD_FOR_HEXAGON) + # Add file implementing posix_memalign. + list(APPEND RUNTIME_SRCS src/runtime/hexagon/hexagon_posix.cc) + + add_definitions(-D_MACH_I32=int) +endif() + file(GLOB DATATYPE_SRCS src/target/datatype/*.cc) list(APPEND COMPILER_SRCS ${DATATYPE_SRCS}) @@ -242,6 +276,7 @@ endif(USE_EXAMPLE_EXT_RUNTIME) # Module rules include(cmake/modules/VTA.cmake) include(cmake/modules/CUDA.cmake) +include(cmake/modules/Hexagon.cmake) include(cmake/modules/OpenCL.cmake) include(cmake/modules/OpenGL.cmake) include(cmake/modules/OpenMP.cmake) @@ -283,7 +318,19 @@ else() set_target_properties(tvm PROPERTIES COMPILE_DEFINITIONS "NDEBUG") endif(USE_RELAY_DEBUG) -if(USE_THREADS) +if(BUILD_FOR_HEXAGON) + # Wrap pthread_create to allow setting custom stack size. + set_target_properties(tvm_runtime PROPERTIES LINK_FLAGS + "-Wl,--wrap=pthread_create") + + target_include_directories(tvm_runtime + PUBLIC "${USE_HEXAGON_SDK}/libs/common/qurt/ADSPv62MP/include/posix" + PUBLIC "${USE_HEXAGON_SDK}/libs/common/qurt/ADSPv62MP/include/qurt" + PUBLIC "${USE_HEXAGON_SDK}/incs" + PUBLIC "${USE_HEXAGON_SDK}/incs/stddef") +endif() + +if(USE_THREADS AND NOT BUILD_FOR_HEXAGON) message(STATUS "Build with thread support...") set(CMAKE_THREAD_PREFER_PTHREAD TRUE) set(THREADS_PREFER_PTHREAD_FLAG TRUE) @@ -291,7 +338,7 @@ if(USE_THREADS) target_link_libraries(tvm Threads::Threads) target_link_libraries(tvm_topi Threads::Threads) target_link_libraries(tvm_runtime Threads::Threads) -endif(USE_THREADS) +endif() target_link_libraries(tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS}) target_link_libraries(tvm_topi tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS}) diff --git a/cmake/config.cmake b/cmake/config.cmake index 8c448b427941..e5681af110a0 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -208,3 +208,6 @@ set(USE_THRUST OFF) # Whether to build the TensorFlow TVMDSOOp module set(USE_TF_TVMDSOOP OFF) +# Whether to use hexagon device +set(USE_HEXAGON_DEVICE OFF) +set(USE_HEXAGON_SDK /path/to/sdk) diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake new file mode 100644 index 000000000000..5b56982a42c5 --- /dev/null +++ b/cmake/modules/Hexagon.cmake @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(PICK_SIM "sim") +set(PICK_HW "target") +set(PICK_NONE "OFF") + +function(find_hexagon_toolchain) + if (NOT "${USE_HEXAGON_TOOLCHAIN}" STREQUAL "") + set(TRY_PATH "${USE_HEXAGON_TOOLCHAIN}") + else() + set(TRY_PATH "${USE_HEXAGON_SDK}") + endif() + message(STATUS "Looking for Hexagon toolchain in ${TRY_PATH}") + file(GLOB_RECURSE HEXAGON_CLANG "${TRY_PATH}/*/hexagon-clang++") + if(HEXAGON_CLANG) + # The path is ${HEXAGON_TOOLCHAIN}/bin/hexagon-clang++. + get_filename_component(HEXAGON_TMP0 "${HEXAGON_CLANG}" DIRECTORY) + get_filename_component(HEXAGON_TMP1 "${HEXAGON_TMP0}" DIRECTORY) + set(HEXAGON_TOOLCHAIN "${HEXAGON_TMP1}" CACHE PATH + "Path to the Hexagon toolchain") + else(HEXAGON_CLANG) + message(SEND_ERROR "Cannot find Hexagon toolchain in ${TRY_PATH}") + endif() +endfunction() + +function(find_hexagon_sdk_root) + message(STATUS "Checking Hexagon SDK root: ${USE_HEXAGON_SDK}") + file(GLOB_RECURSE HEXAGON_AEESTDDEF "${USE_HEXAGON_SDK}/*/AEEStdDef.h") + if(HEXAGON_AEESTDDEF) + # The path is ${HEXAGON_SDK_ROOT}/incs/stddef/AEEStdDef.h. + get_filename_component(HEXAGON_TMP0 "${HEXAGON_AEESTDDEF}" DIRECTORY) + get_filename_component(HEXAGON_TMP1 "${HEXAGON_TMP0}" DIRECTORY) + get_filename_component(HEXAGON_TMP2 "${HEXAGON_TMP1}" DIRECTORY) + set(HEXAGON_SDK_ROOT "${HEXAGON_TMP2}" CACHE PATH + "Root directory of Hexagon SDK") + else(HEXAGON_AEESTDDEF) + message(SEND_ERROR "Cannot validate Hexagon SDK in ${USE_HEXAGON_SDK}") + endif() +endfunction() + +if(USE_HEXAGON_DEVICE STREQUAL "OFF") + list(APPEND COMPILER_SRCS src/target/opt/build_hexagon_off.cc) + return() +elseif(NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_SIM}" AND + NOT USE_HEXAGON_DEVICE STREQUAL "${PICK_HW}") + set(ERROR_MSG + "USE_HEXAGON_DEVICE must be one of [${PICK_NONE}|${PICK_SIM}|${PICK_HW}]") + message(SEND_ERROR "${ERROR_MSG}") + return() +endif() +# If USE_HEXAGON_DEVICE is set to a valid value, make sure that USE_HEXAGON_SDK +# is defined. +if (NOT USE_HEXAGON_SDK) + message(SEND_ERROR "Please set USE_HEXAGON_SDK to the Hexagon SDK root") + return() +endif() + +if(USE_HEXAGON_DEVICE STREQUAL "${PICK_SIM}") + find_hexagon_toolchain() + message(STATUS "Hexagon toolchain: ${HEXAGON_TOOLCHAIN}") + file(GLOB RUNTIME_HEXAGON_SIM_SRCS src/runtime/hexagon/sim/*.cc) + include_directories("${HEXAGON_TOOLCHAIN}/include/iss") + link_directories("${HEXAGON_TOOLCHAIN}/lib/iss") + list(APPEND TVM_RUNTIME_LINKER_LIBS "-lwrapper") +elseif(USE_HEXAGON_DEVICE STREQUAL "${PICK_HW}") + find_hexagon_sdk_root() + find_hexagon_toolchain() + message(STATUS "Hexagon SDK: ${HEXAGON_SDK_ROOT}") + file(GLOB RUNTIME_HEXAGON_DEVICE_SRCS src/runtime/hexagon/target/*.cc) + include_directories("${HEXAGON_SDK_ROOT}/incs/stddef") + include_directories("${HEXAGON_SDK_ROOT}/libs/common/rpcmem/inc") + include_directories( + "${HEXAGON_SDK_ROOT}/libs/common/remote/ship/android_Release_aarch64") + include_directories("${HEXAGON_TOOLCHAIN}/include/iss") + list(APPEND TVM_RUNTIME_LINKER_LIBS "-ldl") +endif() + +file(GLOB RUNTIME_HEXAGON_SRCS src/runtime/hexagon/*.cc) +list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${RUNTIME_HEXAGON_SIM_SRCS} + ${RUNTIME_HEXAGON_DEVICE_SRCS}) + diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index bcb75eab731d..28c390a025b3 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -82,6 +82,7 @@ typedef enum { kDLSDAccel = 6, kOpenGL = 11, kDLMicroDev = 13, + kDLHexagon = 14, // AddExtraTVMType which is not in DLPack here } TVMDeviceExtType; diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index d7372f7254f2..470a1fefd856 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -47,10 +47,10 @@ enum DeviceAttrKind : int { }; /*! \brief Number of bytes each allocation must align to */ -constexpr int kAllocAlignment = 64; +constexpr int kAllocAlignment = 128; /*! \brief Number of bytes each allocation must align to in temporary allocation */ -constexpr int kTempAllocaAlignment = 64; +constexpr int kTempAllocaAlignment = 128; /*! \brief Maximum size that can be allocated on stack */ constexpr int kMaxStackAlloca = 1024; @@ -218,6 +218,7 @@ inline const char* DeviceName(int type) { case kOpenGL: return "opengl"; case kDLExtDev: return "ext_dev"; case kDLMicroDev: return "micro_dev"; + case kDLHexagon: return "hexagon"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } } diff --git a/include/tvm/target/target.h b/include/tvm/target/target.h index 16292094c889..f6fd3c4413b0 100644 --- a/include/tvm/target/target.h +++ b/include/tvm/target/target.h @@ -177,6 +177,10 @@ TVM_DLL Target stackvm(const std::vector& options = /*! \return A target for external device */ TVM_DLL Target ext_dev(const std::vector& options = std::vector()); + +/*! \return A target for hexagon */ +TVM_DLL Target hexagon(const std::vector& options = + std::vector()); } // namespace target /*! diff --git a/jvm/core/src/main/java/org/apache/tvm/TVMContext.java b/jvm/core/src/main/java/org/apache/tvm/TVMContext.java index 9bc59651d931..76375636a6ca 100644 --- a/jvm/core/src/main/java/org/apache/tvm/TVMContext.java +++ b/jvm/core/src/main/java/org/apache/tvm/TVMContext.java @@ -33,6 +33,7 @@ public class TVMContext { MASK2STR.put(7, "vulkan"); MASK2STR.put(8, "metal"); MASK2STR.put(9, "vpi"); + MASK2STR.put(14, "hexagon"); STR2MASK.put("cpu", 1); STR2MASK.put("gpu", 2); @@ -42,6 +43,7 @@ public class TVMContext { STR2MASK.put("vulkan", 7); STR2MASK.put("metal", 8); STR2MASK.put("vpi", 9); + STR2MASK.put("hexagon", 14); } /** @@ -122,6 +124,19 @@ public static TVMContext vpi() { return vpi(0); } + /** + * Construct a Hexagon device. + * @param devId The device id + * @return The created context + */ + public static TVMContext hexagon(int devId) { + return new TVMContext(14, devId); + } + + public static TVMContext hexagon() { + return hexagon(0); + } + public final int deviceType; public final int deviceId; diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 916708a67b46..f781aef0a8be 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -30,7 +30,7 @@ # tvm.runtime from .runtime.object import Object from .runtime.ndarray import context, cpu, gpu, opencl, cl, vulkan, metal, mtl -from .runtime.ndarray import vpi, rocm, opengl, ext_dev, micro_dev +from .runtime.ndarray import vpi, rocm, opengl, ext_dev, micro_dev, hexagon from .runtime import ndarray as nd # tvm.error diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 10f0fec82a80..160cc3ec9e21 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -145,6 +145,7 @@ class TVMContext(ctypes.Structure): 11: 'opengl', 12: 'ext_dev', 13: 'micro_dev', + 14: 'hexagon', } STR2MASK = { 'llvm': 1, @@ -166,6 +167,7 @@ class TVMContext(ctypes.Structure): 'opengl': 11, 'ext_dev': 12, 'micro_dev': 13, + 'hexagon': 14, } def __init__(self, device_type, device_id): super(TVMContext, self).__init__() diff --git a/python/tvm/runtime/ndarray.py b/python/tvm/runtime/ndarray.py index 213ef675bd8f..ee7ab7b5d11f 100644 --- a/python/tvm/runtime/ndarray.py +++ b/python/tvm/runtime/ndarray.py @@ -462,6 +462,22 @@ def micro_dev(dev_id=0): return TVMContext(13, dev_id) +def hexagon(dev_id=0): + """Construct a Hexagon device + + Parameters + ---------- + dev_id : int, optional + The integer device id + + Returns + ------- + ctx : TVMContext + The created context + """ + return TVMContext(14, dev_id) + + cl = opencl mtl = metal diff --git a/python/tvm/target/__init__.py b/python/tvm/target/__init__.py index 3460be47aa95..6b86ff0d0c66 100644 --- a/python/tvm/target/__init__.py +++ b/python/tvm/target/__init__.py @@ -55,7 +55,7 @@ We can also use other specific function in this module to create specific targets. """ from .target import Target, create -from .target import cuda, rocm, mali, intel_graphics, opengl, arm_cpu, rasp, vta, bifrost +from .target import cuda, rocm, mali, intel_graphics, opengl, arm_cpu, rasp, vta, bifrost, hexagon from .generic_func import GenericFunc from .generic_func import generic_func, get_native_generic_func, override_native_generic_func from . import datatype diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index e6046cef1839..b56a4520cfd2 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -247,6 +247,96 @@ def bifrost(model='unknown', options=None): return _ffi_api.TargetCreate("opencl", *opts) +def hexagon(cpu_ver='v66', sim_args=None, hvx=128): + """Returns a Hexagon target. + + Parameters + ---------- + cpu_ver : str + CPU version used for code generation. Not all allowed cpu str + will be valid, LLVM will throw an error. + sim_args : str or list of str + User defined sim arguments. CPU version defaults to cpu_ver. + Otherwise, separate versions are used for codegen and sim. Not + all allowed cpu strings will be valid, simulator will throw an + error if invalid. Does not affect codegen. + hvx : int + Size of hvx register. Value of 0 indicates disabled hvx. + """ + # Example compiler arguments + # llvm -target=hexagon -mcpu=hexagonv66 -mattr=+hvxv66,+hvx-length128b + + # Check for valid codegen cpu + valid_hex = ['v60', 'v62', 'v65', 'v66', 'v67', 'v67t'] + try: + cpu_ver = cpu_ver[cpu_ver.index('v'):].lower() + assert(3 <= len(cpu_ver) <= 4) + except: + msg = '{} is not a valid Hexagon version\nvalid versions include {}' + raise ValueError(msg.format(cpu_ver, valid_hex)) from None + + assert hvx in [0, 64, 128] + + # Target string + def create_target(cpu_ver): + target = ' -target=hexagon' + mcpu = ' -mcpu=hexagon'+cpu_ver + mattr = '' + # HVX enable + if hvx: + mattr = ' -mattr=+hvx' + cpu_ver + ',+hvx-length' + str(hvx) + 'b' + return 'llvm' + target + mcpu + mattr + + # Simulator string + def create_sim(cpu_ver, sim_args): + if not sim_args: + return cpu_ver + + sim_cpu = cpu_ver + ' ' + + # Add user defined args + if isinstance(sim_args, list): + sim_args = ' '.join(sim_args) + + # Check for supplied sim cpu version + if 'v6' in sim_args: + sim_cpu = '' + + # Regex match for allowed cpus + valid_cpu_str_regex = r'(?P
--.*\s)?(--m)?' +                 \
+                r'(?Pv6[25678])(?P[a-z])?' +       \
+                r'(?P_[0-9]+)?(?P_rev[0-9])?\s?(?P--.*)?'
+            m = re.match(valid_cpu_str_regex, sim_args.lower())
+            if not m:
+                raise ValueError(
+                    'Invalid simulator argument string "{}"'.format(sim_args))
+
+            # Parse options into correct order
+            cpu_attr = {x: str(m.groupdict()[x] or '') for x in m.groupdict()}
+            sim_args = cpu_attr['base_version'] +  \
+                       cpu_attr['sub_version']  +  \
+                       cpu_attr['l2_size'] +       \
+                       cpu_attr['rev'] + ' ' +     \
+                       cpu_attr['pre'] + cpu_attr['post']
+
+        # Check HVX length of sim vs codegen
+        if "--hvx_length" in sim_args:
+            i = sim_args.index('hvx_length') + len('hvx_length') + 1
+            sim_hvx = sim_args[i:i+3]
+            if sim_hvx != str(hvx):
+                print('WARNING: sim hvx {} and codegen hvx {} do not match!' \
+                      .format(sim_hvx, hvx))
+
+        return sim_cpu + str(sim_args)
+
+    # Sim args
+    os.environ['HEXAGON_SIM_ARGS'] = create_sim(cpu_ver, sim_args)
+
+    target_str = create_target(cpu_ver)
+    args_list = target_str.split()
+    return _ffi_api.TargetCreate("hexagon", *args_list)
+
+
 def create(target_str):
     """Get a target given target string.
 
diff --git a/src/runtime/hexagon/README.md b/src/runtime/hexagon/README.md
new file mode 100644
index 000000000000..3b52b8321117
--- /dev/null
+++ b/src/runtime/hexagon/README.md
@@ -0,0 +1,76 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Hexagon backend runtime
+
+The Hexagon runtime is a part of the TVM runtime that facilitates communication between a host and a Hexagon device. There are two types of host/device arrangements that are supported:
+- X86/Linux host running Hexagon simulator,
+- Android/AArch64 host running on a physical device containing a Hexagon module (i.e. CSDP or ADSP).
+
+The TVM runtime that contains Hexagon runtime is the one executing on host.  In either case, there will need to be a separate TVM runtime (i.e.  the `libtvm_runtime.so` library) compiled for execution on Hexagon.
+
+The prerequisite is to have Hexagon SDK installed, preferably version 3.5.0.
+
+### Compiling TVM runtime for x86
+
+This will use Hexagon simulator, which is provided in the Hexagon SDK.
+
+When configuring TVM (cmake), set the following variables:
+```
+USE_LLVM=llvm-config
+USE_HEXAGON_DEVICE=sim
+USE_HEXAGON_SDK=/path/to/sdk
+```
+
+Set the C/C++ compiler to `clang`, and pass `-DCMAKE_CXX_FLAGS='-stdlib=libc++'` to the cmake command.
+
+You can then build the entire TVM with the usual command (e.g. `make`).
+
+### Compiling TVM runtime for Android
+
+This will use FastRPC mechanism to communicate between the AArch64 host and Hexagon.
+
+When configuring TVM (cmake), set the following variables:
+```
+USE_LLVM=llvm-config
+USE_HEXAGON_DEVICE=device
+USE_HEXAGON_SDK=/path/to/sdk
+```
+
+You will need Android clang toolchain to compile the runtime.  It is provided in Android NDK r19 or newer.
+
+Set the C/C++ compiler to the Android clang for aarch64, and pass `-DCMAKE_CXX_FLAGS='-stdlib=libc++'` to the cmake command.
+
+Only build the `runtime` component of TVM (e.g. `make runtime`), building the entire TVM will not work.
+
+### Compiling TVM runtime for Hexagon
+
+The TVM runtime executing on Hexagon does not need to have support for Hexagon device in it (as it is only for communication between host and Hexagon device). In fact, it's only needed for basic services (like thread control), and so it should not contain support for any devices.
+
+When configuring TVM (cmake), set the following variables:
+```
+USE_LLVM=OFF
+USE_HEXAGON_DEVICE=OFF
+USE_HEXAGON_SDK=/path/to/sdk
+```
+
+Please note that while suport for a Hexagon device is disabled, the Hexagon SDK is still needed and the path to it needs to be passed to cmake.
+
+Set the C/C++ compiler to `hexagon-clang` (included in the Hexagon SDK), and set `CMAKE_CXX_FLAGS='-stdlib=libc++'`.
+
+As in the case of Android, only build the `runtime` component (e.g.  `make runtime`).
+
diff --git a/src/runtime/hexagon/hexagon_device_api.cc b/src/runtime/hexagon/hexagon_device_api.cc
new file mode 100644
index 000000000000..d88e6d7284a3
--- /dev/null
+++ b/src/runtime/hexagon/hexagon_device_api.cc
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#include "hexagon_module.h"
+
+namespace tvm {
+namespace runtime {
+
+class HexagonDeviceAPI : public DeviceAPI {
+ public:
+  void SetDevice(TVMContext ctx) final;
+  void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
+  void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
+                       DLDataType type_hint) final;
+  void FreeDataSpace(TVMContext ctx, void* ptr) final;
+  void CopyDataFromTo(const void* from, size_t from_offset, void* to,
+                      size_t to_offset, size_t num_bytes, TVMContext ctx_from,
+                      TVMContext ctx_to, DLDataType type_hint,
+                      TVMStreamHandle stream) final;
+  void StreamSync(TVMContext ctx, TVMStreamHandle stream) final;
+  void* AllocWorkspace(TVMContext ctx, size_t nbytes,
+                       DLDataType type_hint = {}) final;
+  void FreeWorkspace(TVMContext ctx, void* ptr) final;
+
+  static const std::shared_ptr& Global() {
+    static std::shared_ptr inst =
+        std::make_shared();
+    return inst;
+  }
+};
+
+// HexagonDeviceAPI.
+
+inline void HexagonDeviceAPI::SetDevice(TVMContext ctx) {}
+
+inline void HexagonDeviceAPI::GetAttr(TVMContext ctx, DeviceAttrKind kind,
+                                      TVMRetValue* rv) {
+  if (kind == kExist) *rv = 1;
+}
+
+inline void* HexagonDeviceAPI::AllocDataSpace(TVMContext ctx, size_t nbytes,
+                                              size_t alignment,
+                                              DLDataType type_hint) {
+  CHECK(hexagon::Device::ValidateDeviceId(ctx.device_id));
+  return hexagon::Device::Global()->Alloc(nbytes, alignment);
+}
+
+inline void HexagonDeviceAPI::FreeDataSpace(TVMContext ctx, void* ptr) {
+  CHECK(hexagon::Device::ValidateDeviceId(ctx.device_id));
+  hexagon::Device::Global()->Free(ptr);
+}
+
+inline void HexagonDeviceAPI::CopyDataFromTo(
+    const void* from, size_t from_offset, void* to, size_t to_offset,
+    size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
+    DLDataType type_hint, TVMStreamHandle stream) {
+  const char* src = static_cast(from) + from_offset;
+  char* dst = static_cast(to) + to_offset;
+
+  auto Is32bit = [](const void* p) {
+    return p == reinterpret_cast(uint32_t(uintptr_t(p)));
+  };
+  (void)Is32bit;
+
+  if (ctx_from.device_type == ctx_to.device_type) {
+    if (ctx_from.device_type == kDLCPU) {
+      memmove(dst, src, num_bytes);
+    } else if (static_cast(ctx_from.device_type) == kDLHexagon) {
+      CHECK(hexagon::Device::ValidateDeviceId(ctx_from.device_id));
+      CHECK_EQ(ctx_from.device_id, ctx_to.device_id);
+      CHECK(Is32bit(dst) && Is32bit(src));
+      hexagon::Device::Global()->CopyDeviceToDevice(dst, src, num_bytes);
+    }
+  } else {
+    if (ctx_from.device_type == kDLCPU) {
+      CHECK_EQ(static_cast(ctx_to.device_type), kDLHexagon);
+      CHECK(Is32bit(dst));
+      CHECK(hexagon::Device::ValidateDeviceId(ctx_to.device_id));
+      hexagon::Device::Global()->CopyHostToDevice(dst, src, num_bytes);
+    } else {
+      CHECK_EQ(static_cast(ctx_from.device_type), kDLHexagon);
+      CHECK_EQ(ctx_to.device_type, kDLCPU);
+      CHECK(Is32bit(src));
+      CHECK(hexagon::Device::ValidateDeviceId(ctx_from.device_id));
+      hexagon::Device::Global()->CopyDeviceToHost(dst, src, num_bytes);
+    }
+  }
+}
+
+inline void HexagonDeviceAPI::StreamSync(TVMContext ctx,
+                                         TVMStreamHandle stream) {}
+
+inline void* HexagonDeviceAPI::AllocWorkspace(TVMContext ctx, size_t nbytes,
+                                              DLDataType type_hint) {
+  CHECK(hexagon::Device::ValidateDeviceId(ctx.device_id));
+  if (type_hint.code == 100) {
+    size_t align = std::min(nbytes, 2048lu);
+    return hexagon::Device::Global()->AllocVtcm(nbytes, align);
+  }
+  return DeviceAPI::AllocWorkspace(ctx, nbytes, type_hint);
+}
+
+inline void HexagonDeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) {
+  CHECK(hexagon::Device::ValidateDeviceId(ctx.device_id));
+  DeviceAPI::FreeWorkspace(ctx, ptr);
+}
+
+TVM_REGISTER_GLOBAL("device_api.hexagon")
+    .set_body([](TVMArgs args, TVMRetValue* rv) {
+      DeviceAPI* ptr = HexagonDeviceAPI::Global().get();
+      *rv = ptr;
+    });
+}  // namespace runtime
+}  // namespace tvm
+
+// Hexagon-specific runtime functions to allocate/deallocate workspaces
+// in VTCM.
+extern "C" {
+void* HexagonBackendAllocateVTCM(uint32_t nbytes, uint32_t align) {
+  align = std::max(align, 2048u);
+  return tvm::runtime::hexagon::Device::Global()->AllocVtcm(nbytes, align);
+}
+void HexagonBackendFreeVTCM(void* ptr) {
+  return tvm::runtime::hexagon::Device::Global()->FreeVtcm(ptr);
+}
+}
diff --git a/src/runtime/hexagon/hexagon_module.cc b/src/runtime/hexagon/hexagon_module.cc
new file mode 100644
index 000000000000..e14843688b73
--- /dev/null
+++ b/src/runtime/hexagon/hexagon_module.cc
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "hexagon_module.h"
+
+#ifdef __ANDROID__
+#include 
+#endif
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../file_util.h"
+#include "../meta_data.h"
+
+namespace tvm {
+namespace runtime {
+
+hexagon::Device::~Device() {}
+
+namespace hexagon {
+
+/*!
+ * \brief Function argument locations according to the Hexagon ABI.
+ *
+ * In order to invoke a function whose arguments are in TVMArgs list, at
+ * some point before branching to the function's address, these arguments
+ * need to be loaded into locations (registers or stack) specified by the
+ * corresponding ABI.
+ * When a host wants to call a function on Hexagon, the host will identify
+ * how each element of the TVMArgs list will be passed to the Hexagon
+ * function. This class is a description of which values should go into
+ * registers, and which values should be on stack. Right before the call
+ * this class will be serialized and transfereed over to the Hexagon side.
+ * The code running on Hexagon will then execute the argument placement
+ * and invoke the function.
+ */
+struct ArgLayout {
+  std::vector Scalar; /*!< Values going into registers, maximum  */
+                                /*!< 6, including dummy values for skipped */
+                                /*!< registers.                            */
+  std::vector Stack;  /*!< Values going on stack, including      */
+                                /*!< dummy values for padding.             */
+  // There are no vector types at this time.
+
+  /*!
+   * \brief Alignment of type T on Hexagon.
+   */
+  template 
+  static constexpr unsigned align_of();
+  /*!
+   * \brief Size of type T on Hexagon.
+   */
+  template 
+  static constexpr unsigned size_of();
+
+  /*!
+   * \brief Add a value of type T to the layout.
+   */
+  template 
+  void Push(const T& v);
+
+ private:
+  /*!
+   * \brief Add raw data to the layout.
+   * \param v         Pointer to the raw data as an array of 32-bit words.
+   * \param t_size    Number of bytes to add.
+   * \param t_align   Required alignment of the data on Hexagon.
+   */
+  void Push(uint32_t* v, unsigned t_size, unsigned t_align);
+};
+
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 4;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 4;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 4;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 4;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 8;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 8;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 8;
+}
+template <>
+constexpr unsigned ArgLayout::align_of() {
+  return 4;
+}
+
+template 
+constexpr unsigned ArgLayout::align_of() {
+  // The static_assertion should depend on T so that it's only checked
+  // after instantiation.
+  static_assert((sizeof(T), false), "Implement align_of for this type");
+  return 0;
+}
+
+template 
+constexpr unsigned ArgLayout::size_of() {
+  return ArgLayout::align_of();
+}
+
+template 
+void ArgLayout::Push(const T& v) {
+  static_assert(std::is_scalar::value, "T must be a scalar");
+  constexpr unsigned T_size = size_of();
+  // The reason for this assertion is to avoid sign-extensions here:
+  // an extra bit of information would be required to determine whether
+  // a size- or a zero-extension is needed.
+  static_assert(T_size >= 4, "Type should be of size that is at least 4");
+  union {
+    uint32_t v[(T_size + 3) / 4];
+    T t;
+  } u;
+
+  u.t = v;
+  Push(u.v, T_size, align_of());
+}
+
+void ArgLayout::Push(uint32_t* v, unsigned t_size, unsigned t_align) {
+  // t_size == 4 and t_size == 8 can be passed in scalar registers.
+  bool InReg = false;
+  if (t_size == 4) {
+    if (Scalar.size() < 6) {
+      Scalar.push_back(v[0]);
+      InReg = true;
+    }
+  } else if (t_size == 8) {
+    // Round the size up to the next
+    unsigned cs = Scalar.size();
+    if (cs <= 4) {
+      // There is room in the scalar registers.
+      if (cs & 1) Scalar.push_back(0u);
+      Scalar.push_back(v[0]);
+      Scalar.push_back(v[1]);
+      InReg = true;
+    }
+  }
+
+  if (!InReg) {
+    // Allocate on stack.
+    CHECK_EQ((t_align & (t_align - 1)), 0)
+        << "Alignment should be a power of 2";
+    CHECK_GE(t_align, 4) << "Alignment should be at least 4";
+    // Round t_size up to a multiple of 4.
+    unsigned s_size = Stack.size();
+    unsigned s_align = t_align / 4;  // Alignment of T in words on the stack.
+    unsigned pad = ((s_size + s_align - 1) / s_align) * s_align - s_size;
+    Stack.insert(Stack.end(), pad / 4, 0u);
+    Stack.insert(Stack.end(), v, v + t_size / 4);
+  }
+}
+
+}  // namespace hexagon
+
+class HexagonModuleNode final : public runtime::ModuleNode {
+ public:
+  HexagonModuleNode(std::string data, std::string fmt,
+                    std::unordered_map fmap,
+                    std::string asm_str, std::string obj_str,
+                    std::string ir_str, std::string bc_str,
+                    const std::set& packed_c_abi)
+      : hexagon_device_(hexagon::Device::Global()),
+        data_(data),
+        fmt_(fmt),
+        fmap_(fmap),
+        asm_(asm_str),
+        obj_(obj_str),
+        ir_(ir_str),
+        bc_(bc_str),
+        packed_c_abi_funcs_(packed_c_abi) {
+    dl_handle_ = hexagon_device_->Load(data, fmt);
+  }
+  ~HexagonModuleNode() {
+    if (dl_handle_) {
+      hexagon_device_->Unload(dl_handle_);
+    }
+  }
+
+  PackedFunc GetFunction(const std::string& name,
+                         const ObjectPtr& sptr_to_self) final;
+
+  const char* type_key() const final { return "hexagon"; }
+
+  void SaveToFile(const std::string& file_name,
+                  const std::string& format) final {
+    std::string fmt = runtime::GetFileFormat(file_name, format);
+    if (fmt == "so" || fmt == "dll" || fmt == "hexagon") {
+      std::string meta_file = GetMetaFilePath(file_name);
+      SaveMetaDataToFile(meta_file, fmap_);
+      std::string c = "cp " + data_ + " " + file_name;
+      CHECK(std::system(c.c_str()) == 0) << "Cannot create " + file_name;
+    } else if (fmt == "s" || fmt == "asm") {
+      CHECK(!asm_.empty()) << "Assembler source not available";
+      SaveBinaryToFile(file_name, asm_);
+    } else if (fmt == "o" || fmt == "obj") {
+      CHECK(!obj_.empty()) << "Object data not available";
+      SaveBinaryToFile(file_name, obj_);
+    } else if (fmt == "ll") {
+      CHECK(!ir_.empty()) << "LLVM IR source not available";
+      SaveBinaryToFile(file_name, ir_);
+    } else if (fmt == "bc") {
+      CHECK(!bc_.empty()) << "LLVM IR bitcode not available";
+      SaveBinaryToFile(file_name, bc_);
+    } else {
+      LOG(FATAL) << "HexagonModuleNode::SaveToFile: unhandled format `" << fmt
+                 << "'";
+    }
+  }
+  void SaveToBinary(dmlc::Stream* stream) final {
+    stream->Write(fmt_);
+    stream->Write(fmap_);
+    stream->Write(data_);
+  }
+
+ private:
+  void CallRemotePackedCABI(void* func_ptr, const TVMArgs& args,
+                            TVMRetValue* rv) const;
+  void CallRemoteDirect(void* func_ptr, const TVMArgs& args,
+                        TVMRetValue* rv) const;
+  void RemapArgs(const TVMArgs& args,
+                 std::vector& values,              // NOLINT(*)
+                 std::vector& type_codes,               // NOLINT(*)
+                 std::vector& remote_tensors) const;  // NOLINT(*)
+  void* CreateRemoteTensor(const DLTensor* T) const;
+  hexagon::ArgLayout BuildArgLayout(const TVMArgs& Aa) const;
+
+  std::shared_ptr hexagon_device_;
+  void* dl_handle_ = nullptr;
+  std::string data_;
+  std::string fmt_;
+  std::unordered_map fmap_;
+  std::string asm_;
+  std::string obj_;
+  std::string ir_;
+  std::string bc_;
+  std::set packed_c_abi_funcs_;
+};
+
+void HexagonModuleNode::CallRemotePackedCABI(void* func_ptr,
+                                             const TVMArgs& args,
+                                             TVMRetValue* rv) const {
+  // Remap all arguments, creating remote DLTensors.
+  std::vector values;
+  std::vector codes;
+  std::vector remote_tensors;
+
+  RemapArgs(args, values, codes, remote_tensors);
+  // The prototype of packed C function is
+  //   int (TVMValue* args, int* type_codes, int num_args,
+  //        TVMValue* ret_value, int* ret_code)
+  // The pointers must point to allocated space, the return information
+  // will be filled in by the callee.
+  // Allocate remote buffer to hold:
+  // 1. argument TVMValues,
+  // 2. return TVMValue,
+  // 3. argument type codes,
+  // 4. return type code.
+
+  int num_args = args.size();
+  int values_size = num_args * sizeof(TVMValue);
+  int codes_size = num_args * sizeof(int);
+  void* remote = hexagon_device_->Alloc(
+      values_size + sizeof(TVMValue) + codes_size + sizeof(int), 8);
+
+  // Copy all argument TVMValues to the remote space.
+  void* remote_values = remote;
+  void* remote_ret_value = static_cast(remote_values) + values_size;
+  void* remote_codes = static_cast(remote_ret_value) + sizeof(TVMValue);
+  void* remote_ret_code = static_cast(remote_codes) + codes_size;
+  hexagon_device_->CopyHostToDevice(remote_values, values.data(), values_size);
+  hexagon_device_->CopyHostToDevice(remote_codes, codes.data(), codes_size);
+
+  // Call the function: construct temporary values/codes and pass them through
+  // the arg layout building to preprare for the actual remote call.
+  TVMValue temp_values[5];
+  temp_values[0].v_handle = remote_values;
+  temp_values[1].v_handle = remote_codes;
+  temp_values[2].v_int64 = num_args;
+  temp_values[3].v_handle = remote_ret_value;
+  temp_values[4].v_handle = remote_ret_code;
+  int temp_codes[5] = {kTVMOpaqueHandle, kTVMOpaqueHandle, kDLInt,
+                       kTVMOpaqueHandle, kTVMOpaqueHandle};
+  TVMArgs temp_args(temp_values, temp_codes, 5);
+  hexagon::ArgLayout as = BuildArgLayout(temp_args);
+  hexagon_device_->Call(func_ptr, as.Scalar.data(), as.Scalar.size(),
+                        as.Stack.data(), as.Stack.size());
+
+  // TODO(kparzysz-quic): copy return value back
+  std::for_each(remote_tensors.begin(), remote_tensors.end(),
+                [this](void* t) { hexagon_device_->Free(t); });
+  hexagon_device_->Free(remote);
+}
+
+void HexagonModuleNode::CallRemoteDirect(void* func_ptr, const TVMArgs& args,
+                                         TVMRetValue* rv) const {
+  hexagon::ArgLayout as = BuildArgLayout(args);
+  hexagon_device_->Call(func_ptr, as.Scalar.data(), as.Scalar.size(),
+                        as.Stack.data(), as.Stack.size());
+}
+
+PackedFunc HexagonModuleNode::GetFunction(
+    const std::string& name, const ObjectPtr& sptr_to_self) {
+  auto f = fmap_.find(name);
+  if (f == fmap_.end()) return PackedFunc(nullptr);
+
+  // Get function pointer from device.
+  void* pf = hexagon_device_->Resolve(name);
+  // The cast result and the original share ownership. Do the cast here
+  // so that sptr_to_self can be destroyed (i.e. "func" will only have
+  // one shared pointer to HexagonModuleNode).
+  auto sref = ObjectRef(sptr_to_self);
+
+  if (packed_c_abi_funcs_.count(name)) {
+    // Calling packed C func, follow the TVMBackendPackedCFunc prototype.
+    return PackedFunc([pf, sref](TVMArgs args, TVMRetValue* rv) {
+      const auto* hm = sref.as();
+      hm->CallRemotePackedCABI(pf, args, rv);
+    });
+  } else {
+    // Direct call to a non-packed-C function.
+    return PackedFunc([pf, sref](TVMArgs args, TVMRetValue* rv) {
+      const auto* hm = sref.as();
+      hm->CallRemoteDirect(pf, args, rv);
+    });
+  }
+}
+
+void HexagonModuleNode::RemapArgs(const TVMArgs& args,
+                                  std::vector& values,
+                                  std::vector& type_codes,
+                                  std::vector& remote_tensors) const {
+  for (unsigned i = 0, e = args.size(); i != e; ++i) {
+    const TVMArgValue& a = args[i];
+
+    switch (unsigned tc = a.type_code()) {
+      case kTVMNDArrayHandle:
+      case kTVMDLTensorHandle: {
+        DLTensor* t = static_cast(a);
+        assert(TVMDeviceExtType(t->ctx.device_type) == kDLHexagon);
+        TVMValue v;
+        v.v_handle = CreateRemoteTensor(t);
+        remote_tensors.push_back(v.v_handle);
+        values.push_back(v);
+        type_codes.push_back(tc);
+        break;
+      }
+
+      default:
+        values.push_back(a.value());
+        type_codes.push_back(tc);
+        break;
+    }
+  }
+}
+
+void* HexagonModuleNode::CreateRemoteTensor(const DLTensor* t) const {
+  /*
+    Layout of the DLTensor structure on Hexagon.
+
+    DLTensor:                       Size  offset
+      data              void*          4       0
+      ctx.device_type   enum           1       4
+                                  3       5
+      ctx.device_id     int            4       8
+      ndim              int            4      12
+      dtype.code        uint8_t        1      16
+      dtype.bits        uint8_t        1      17
+      dtype.lanes       uint16_t       2      18
+      shape             int64_t*       4      20
+      strides           int64_t*       4      24
+                                  4      28
+      byte_offset       uint64_t       8      32
+      .. end ................................ 40
+  */
+  struct __attribute__((packed)) HexagonDLTensor {
+    uint32_t data;
+    uint8_t ctx_device_type;
+    uint8_t pad0[3];  // MUST BE ZERO!
+    int32_t ctx_device_id;
+    int32_t ndim;
+    uint8_t dtype_code;
+    uint8_t dtype_bits;
+    uint16_t dtype_lanes;
+    uint32_t shape;
+    uint32_t strides;
+    uint8_t pad1[4];
+    uint64_t byte_offset;
+  };
+
+  constexpr uint32_t size_ht = sizeof(HexagonDLTensor);
+  static_assert(size_ht == 40, "HexagonDLTensor should be 40 bytes");
+
+  // Shape and strides will contain ndim elements of size sizeof(uint64_t)
+  // each. Allocate them after the main structure.
+  int ndim = t->ndim;
+  uint32_t size_s = 8 * ndim;  // sizeof(uint64_t)*ndim
+  uint32_t size_ss = t->strides ? 2 * size_s : size_s;
+  void* remote = hexagon_device_->Alloc(size_ht + size_ss, 8);
+  uint32_t remote_as_int = reinterpret_cast(remote);
+  void* remote_ss = reinterpret_cast(remote_as_int + size_ht);
+
+  HexagonDLTensor local = {
+      .data = static_cast(reinterpret_cast(t->data)),
+      .ctx_device_type = uint8_t(t->ctx.device_type),
+      .pad0 = {0, 0, 0},
+      .ctx_device_id = t->ctx.device_id,
+      .ndim = t->ndim,
+      .dtype_code = t->dtype.code,
+      .dtype_bits = t->dtype.bits,
+      .dtype_lanes = t->dtype.lanes,
+      .shape = remote_as_int + size_ht,
+      .strides = t->strides ? remote_as_int + size_ht + size_s : 0u,
+      .byte_offset = t->byte_offset};
+
+  std::vector local_ss(size_ss / 8);
+  for (int i = 0; i != ndim; ++i) local_ss[i] = t->shape[i];
+  if (t->strides) {
+    for (int i = 0; i != ndim; ++i) local_ss[ndim + i] = t->strides[i];
+  }
+
+  hexagon_device_->CopyHostToDevice(remote, &local, sizeof local);
+  hexagon_device_->CopyHostToDevice(remote_ss, local_ss.data(), size_ss);
+  return remote;
+}
+
+hexagon::ArgLayout HexagonModuleNode::BuildArgLayout(const TVMArgs& As) const {
+  hexagon::ArgLayout Args;
+
+  for (unsigned i = 0, e = As.size(); i != e; ++i) {
+    const TVMArgValue& A = As[i];
+    unsigned TC = A.type_code();
+    switch (TC) {
+      // Treat all integers as 32-bit values.
+      case kDLInt:
+      case kDLUInt:
+        // KLUDGE: There is no distinction between 32- and 64-bit integer
+        // types, so there is no way to tell if the value being passed needs
+        // one or two registers. Assume that all integers are 32-bit, and
+        // simply abort if the actual value does not fit.
+        CHECK_EQ(static_cast(A), static_cast(A));
+        Args.Push(static_cast(A));
+        break;
+      // 64-bit values
+      case kDLFloat:
+        Args.Push(static_cast(A));
+        break;
+
+      case kTVMOpaqueHandle:
+      case kTVMNullptr:
+      case kTVMObjectHandle:
+      case kTVMModuleHandle:
+      case kTVMPackedFuncHandle:
+        Args.Push(static_cast(A));
+        break;
+
+      case kTVMNDArrayHandle:
+      case kTVMDLTensorHandle:
+        LOG(FATAL) << __func__ << ": cannot handle DLTensor*, code:" << TC;
+
+      default:
+        LOG(FATAL) << __func__ << ": unhandled type code" << TC;
+        break;
+    }
+  }
+
+  return Args;
+}
+
+Module HexagonModuleCreate(std::string data, std::string fmt,
+                           std::unordered_map fmap,
+                           std::string asm_str, std::string obj_str,
+                           std::string ir_str, std::string bc_str,
+                           const std::set& packed_c_abi) {
+  auto n = make_object(data, fmt, fmap, asm_str, obj_str,
+                                          ir_str, bc_str, packed_c_abi);
+  return Module(n);
+}
+
+// Load module from file.
+Module HexagonModuleLoadFile(const std::string& file_name,
+                             const std::string& format) {
+  std::string data = file_name;
+  std::unordered_map fmap;
+  std::string fmt = GetFileFormat(file_name, format);
+  std::string meta_file = GetMetaFilePath(file_name);
+  LoadMetaDataFromFile(meta_file, &fmap);
+
+  std::string empty;
+  // This passes {} as the set of packed C functions. Won't work for
+  // standalone functions on target.
+  return HexagonModuleCreate(data, fmt, fmap, empty, empty, empty, empty, {});
+}
+
+namespace hexagon {
+
+std::shared_ptr Device::Global() {
+  // Declare device constructors.
+#ifdef __ANDROID__
+  std::shared_ptr CreateHexagonTarget(void);
+#else
+  std::shared_ptr CreateHexagonSimulator(void);
+#endif
+
+  static std::shared_ptr dev(
+#ifdef __ANDROID__
+      CreateHexagonTarget()
+#else
+      CreateHexagonSimulator()
+#endif
+  );  // NOLINT
+
+  return dev;
+}
+
+}  // namespace hexagon
+
+TVM_REGISTER_GLOBAL("runtime.module.loadfile_hexagon")
+    .set_body([](TVMArgs args, TVMRetValue* rv) {
+      *rv = HexagonModuleLoadFile(args[0], args[1]);
+    });
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/hexagon/hexagon_module.h b/src/runtime/hexagon/hexagon_module.h
new file mode 100644
index 000000000000..c9e23a77776e
--- /dev/null
+++ b/src/runtime/hexagon/hexagon_module.h
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_
+#define TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../meta_data.h"
+
+namespace tvm {
+namespace runtime {
+
+/*!
+ * \brief Create a Hexagon module from data.
+ * \param data          The module data.
+ * \param fmt           The format of the data, can be "obj".
+ * \param fmap          The function information map of each function.
+ * \param asm_str       String with the generated assembly source.
+ * \param obj_str       String with the object file data.
+ * \param ir_str        String with the disassembled LLVM IR source.
+ * \param bc_str        String with the bitcode LLVM IR.
+ * \param packed_c_abi  Set of names of functions using PackedC calling
+ *                      convention.
+ */
+Module HexagonModuleCreate(std::string data, std::string fmt,
+                           std::unordered_map fmap,
+                           std::string asm_str, std::string obj_str,
+                           std::string ir_str, std::string bc_str,
+                           const std::set& packed_c_abi);
+
+namespace hexagon {
+
+/*!
+ * \brief Low-level interface for communicating with Hexagon devices.
+ */
+class Device {
+ public:
+  /*!
+   * \brief Allocate memory on device.
+   * \param size    Requested size.
+   * \param align   Requested alignment.
+   * \return        Pointer (local to the device) of the allocated memory,
+   *                or nullptr if allocation failed.
+   */
+  virtual void* Alloc(unsigned size, unsigned align) = 0;
+  /*!
+   * \brief Release allocated memory on device.
+   * \param ptr     Pointer to memory previously allocated by \ref Alloc.
+   */
+  virtual void Free(void* ptr) = 0;
+  /*!
+   * \brief Allocate VTCM memory on device.
+   * \param size    Requested size.
+   * \param align   Requested alignment.
+   * \return        Pointer (local to the device) of the allocated memory,
+   *                or nullptr if allocation failed.
+   */
+  virtual void* AllocVtcm(unsigned size, unsigned align) = 0;
+  /*!
+   * \brief Release allocated VTCM memory on device.
+   * \param ptr     Pointer to memory previously allocated by \ref AllocVtcm.
+   */
+  virtual void FreeVtcm(void* ptr) = 0;
+  /*!
+   * \brief Copy a block of data on device to another location on the device.
+   * \param dst     Pointer (local to device) to the destination buffer.
+   * \param src     Pointer (local to device) of the source buffer.
+   * \param len     Number of bytes to copy.
+   */
+  virtual void CopyDeviceToDevice(void* dst, const void* src,
+                                  unsigned len) = 0;
+  /*!
+   * \brief Copy a block of data from device to host.
+   * \param host_dst  Pointer (local to host) to the destination buffer.
+   * \param src       Pointer (local to device) to the source buffer.
+   * \param len       Number of bytes to copy.
+   */
+  virtual void CopyDeviceToHost(void* host_dst, const void* src,
+                                unsigned len) = 0;
+  /*!
+   * \brief Copy a block of data from host to device.
+   * \param dst       Pointer (local to device) to the destination buffer.
+   * \param host_src  Pointer (local to host) to the source buffer.
+   * \param len       Number of bytes to copy.
+   */
+  virtual void CopyHostToDevice(void* dst, const void* host_src,
+                                unsigned len) = 0;
+  /*!
+   * \brief Load a module (typically a shared library) into device.
+   * \param data    Name of the shared library.
+   * \param fmt     Format of the library (currently ignored).
+   * \return        Pointer to the loaded module.
+   * \note Currently only one module can be loaded at any given time.
+   */
+  virtual void* Load(const std::string& data, const std::string& fmt) = 0;
+  /*!
+   * \brief Unload a module from device.
+   * \param mod     Pointer to a loaded module returned by \ref Load.
+   */
+  virtual void Unload(void* mod) = 0;
+  /*!
+   * \brief Find the address of an object in the currently loaded module.
+   * \param sym     Name of the object.
+   * \return Address of the located object, or nullptr if object was
+   *         not found.
+   */
+  virtual void* Resolve(const std::string& sym) = 0;
+  /*!
+   * \brief Invoke a function on device with given arguments.
+   * \param func    Address (local to device) of the function to call.
+   * \param scalar  Pointer to an array of 32-bit values that will be
+   *                passed via consecutive registers: r0..r5. This array
+   *                includes dummy values for skipped registers.
+   * \param sc_num  Number of values in the "scalar" array.
+   * \param stack   Pointer to an array of 32-bit values that will be
+   *                passed on the stack. This array includes dummy values
+   *                for padding.
+   * \param st_num  Number of values in the "stack" array.
+   */
+  virtual void Call(void* func, uint32_t* scalar, unsigned sc_num,
+                    uint32_t* stack, unsigned st_num) = 0;
+
+  virtual ~Device() = 0;
+
+  static std::shared_ptr Global();
+  static bool ValidateDeviceId(decltype(DLContext::device_id) device_id) {
+    // Only supporting a single device for now.
+    return device_id == 0;
+  }
+};
+
+}  // namespace hexagon
+
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_
diff --git a/src/runtime/hexagon/hexagon_posix.cc b/src/runtime/hexagon/hexagon_posix.cc
new file mode 100644
index 000000000000..627963f384f5
--- /dev/null
+++ b/src/runtime/hexagon/hexagon_posix.cc
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#if defined(__hexagon__)
+
+#include 
+#include 
+
+extern "C" {
+int posix_memalign(void** memptr, size_t alignment, size_t size)
+    __attribute__((nothrow));
+}
+
+__attribute__((nothrow)) int posix_memalign(void** memptr, size_t alignment,
+                                            size_t size) {
+  if (void* p = memalign(alignment, size)) {
+    *memptr = p;
+    return 0;
+  }
+
+  return ENOMEM;
+}
+#endif
diff --git a/src/runtime/hexagon/sim/hexagon_device_sim.cc b/src/runtime/hexagon/sim/hexagon_device_sim.cc
new file mode 100644
index 000000000000..63eed5eabd9c
--- /dev/null
+++ b/src/runtime/hexagon/sim/hexagon_device_sim.cc
@@ -0,0 +1,1419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../hexagon_module.h"
+#include "HexagonWrapper.h"
+#include "hexagon_sim_proto.h"
+
+namespace tvm {
+namespace runtime {
+namespace hexagon {
+
+static_assert(sizeof(HEX_VA_t) == sizeof(uint32_t),
+              "Hexagon VA must be uint32");
+
+template 
+struct unalign {
+  using type = struct { T value; } __attribute__((aligned(1), packed));
+};
+
+template 
+struct uint {
+  using type = void;
+};
+
+template <>
+struct uint<8> {
+  using type = uint64_t;
+};
+template <>
+struct uint<4> {
+  using type = uint32_t;
+};
+template <>
+struct uint<2> {
+  using type = uint16_t;
+};
+template <>
+struct uint<1> {
+  using type = uint8_t;
+};
+
+using string_list = std::deque;
+
+namespace detail {
+
+template 
+std::unique_ptr make_unique(Args... args) {
+  return std::unique_ptr(new T(std::forward(args)...));
+}
+template 
+std::unique_ptr make_unique(size_t size) {
+  using U = typename std::remove_extent::type;
+  return std::unique_ptr(new U[size]());
+}
+
+// Converter class to translate vector to char**. This relieves the
+// user from memory reallocation and copying.
+struct non_const_str {
+  non_const_str() {}
+  explicit non_const_str(const std::string& str)
+      : non_const_str(std::vector{str}) {}
+  explicit non_const_str(const std::vector& vec) {
+    for (const std::string& s : vec) {
+      auto c = detail::make_unique(s.size() + 1);
+      std::strncpy(c.get(), s.c_str(), s.size() + 1);
+      storage_.push_back(std::move(c));
+      pointers_.push_back(storage_.back().get());
+    }
+  }
+  non_const_str(non_const_str&& ncs) { *this = std::move(ncs); }
+  non_const_str& operator=(non_const_str&& ncs) {
+    if (this != &ncs) {
+      for (auto& s : ncs.storage_) storage_.push_back(std::move(s));
+      for (auto& s : storage_) pointers_.push_back(s.get());
+    }
+    return *this;
+  }
+  size_t size() const { return pointers_.size(); }
+  operator char*() {
+    CHECK_EQ(pointers_.size(), 1);
+    return pointers_[0];
+  }
+  operator char* *() { return pointers_.data(); }
+
+ private:
+  std::vector pointers_;
+  std::vector> storage_;
+};
+
+using MaybeString = llvm::Optional;
+
+MaybeString front(const string_list& deq) {
+  return !deq.empty() ? MaybeString(deq.front()) : MaybeString();
+}
+
+MaybeString pop_front(string_list& deq) {  // NOLINT(*)
+  if (deq.empty()) return MaybeString();
+  std::string f = deq.front();
+  deq.pop_front();
+  return MaybeString(f);
+}
+
+llvm::Optional to_int(const MaybeString& str) {
+  auto none = llvm::Optional();
+  if (str.hasValue()) {
+    try {
+      size_t pos;
+      int64_t val = std::stoll(*str, &pos, 0);
+      return pos == str->size() ? llvm::Optional(val) : none;
+    } catch (std::invalid_argument) {
+    }
+  }
+  return none;
+}
+
+llvm::Optional to_uint(const MaybeString& str) {
+  auto none = llvm::Optional();
+  if (str.hasValue()) {
+    try {
+      size_t pos;
+      uint64_t val = std::stoull(*str, &pos, 0);
+      return pos == str->size() ? llvm::Optional(val) : none;
+    } catch (std::invalid_argument) {
+    }
+  }
+  return none;
+}
+
+llvm::Optional to_float(const MaybeString& str) {
+  auto none = llvm::Optional();
+  if (str.hasValue()) {
+    try {
+      size_t pos;
+      float val = std::stof(*str, &pos);
+      return pos == str->size() ? llvm::Optional(val) : none;
+    } catch (std::invalid_argument) {
+    }
+  }
+  return none;
+}
+
+llvm::Optional to_bool(const MaybeString& str) {
+  auto none = llvm::Optional();
+  if (auto num = to_int(str)) {
+    if (*num == 0) return false;
+    if (*num == 1) return true;
+    return none;
+  }
+  if (str) {
+    if (*str == "true" || *str == "TRUE") return true;
+    if (*str == "false" || *str == "FALSE") return false;
+  }
+  return none;
+}
+
+template 
+using MaybeRange = llvm::Optional>;
+
+template  Parse(const MaybeString&)>
+MaybeRange to_range(const MaybeString& str) {
+  auto none = MaybeRange();
+  if (str && !str->empty()) {
+    auto n = str->find('-', 1);
+    if (n != std::string::npos) {
+      auto begin = Parse(str->substr(0, n));
+      auto end = Parse(str->substr(n + 1, str->size() - n - 1));
+      if (begin && end) {
+        return std::make_pair(*begin, *end);
+      }
+    }
+  }
+  return none;
+}
+
+}  // namespace detail
+
+class HexagonSimulator final : public tvm::runtime::hexagon::Device {
+ public:
+  explicit HexagonSimulator(bool enable_queuing);
+  ~HexagonSimulator() final {}
+  void* Alloc(unsigned size, unsigned align) final;
+  void Free(void* ptr) final;
+  void* AllocVtcm(unsigned size, unsigned align) final;
+  void FreeVtcm(void* ptr) final;
+  void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final;
+  void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final;
+  void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final;
+  void* Load(const std::string& data, const std::string& fmt) final;
+  void Unload(void* mod) final;
+  void* Resolve(const std::string& sym) final;
+  void Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack,
+            unsigned st_num) final;
+
+  static std::string to_string(HEXAPI_Status status);
+
+ private:
+  static HEX_VA_t p2va(const void* p);
+  static void* va2p(HEX_VA_t va);
+
+  void CopyFromV(void* host_dst, HEX_VA_t src, unsigned len);
+  void CopyToV(HEX_VA_t dst, const void* host_src, unsigned len);
+
+  template 
+  void CopyNToV(HEX_VA_t dst, const void* host_src);
+  template 
+  void CopyNFromV(void* host_dst, HEX_VA_t src);
+
+  // NOLINTNEXTLINE(runtime/references)
+  void SendMsg(Message& m, const void* data, bool show_dbg);
+
+  std::string arch_;
+  std::unique_ptr sim_;
+  HEX_VA_t dispatch_v_, message_buffer_v_;
+  bool task_queuing_;
+
+  // Sim configuration routines.
+  bool Configure(string_list& opts);  // NOLINT(*)
+
+  bool HandleAHBBusPenalty(string_list& rest);      // NOLINT(*)
+  bool HandleAHBBusRatio(string_list& rest);        // NOLINT(*)
+  bool HandleAHBHighAddr(string_list& rest);        // NOLINT(*)
+  bool HandleAHBLowAddr(string_list& rest);         // NOLINT(*)
+  bool HandleAXI2BusPenalty(string_list& rest);     // NOLINT(*)
+  bool HandleAXI2BusRatio(string_list& rest);       // NOLINT(*)
+  bool HandleAXI2HighAddr(string_list& rest);       // NOLINT(*)
+  bool HandleAXI2LowAddr(string_list& rest);        // NOLINT(*)
+  bool HandleBuildTag(string_list& rest);           // NOLINT(*)
+  bool HandleBusPenalty(string_list& rest);         // NOLINT(*)
+  bool HandleBusRatio(string_list& rest);           // NOLINT(*)
+  bool HandleBusTrace(string_list& rest);           // NOLINT(*)
+  bool HandleBypassIdle(string_list& rest);         // NOLINT(*)
+  bool HandleConnectionTimeout(string_list& rest);  // NOLINT(*)
+  bool HandleCoprocTrace(string_list& rest);        // NOLINT(*)
+  bool HandleCoreDump(string_list& rest);           // NOLINT(*)
+  bool HandleCosimFile(string_list& rest);          // NOLINT(*)
+  bool HandleDCacheTrace(string_list& rest);        // NOLINT(*)
+  bool HandleDSPClock(string_list& rest);           // NOLINT(*)
+  bool HandleETMCFGBase(string_list& rest);         // NOLINT(*)
+  bool HandleGDBServ(string_list& rest);            // NOLINT(*)
+  bool HandleHVXLength(string_list& rest);          // NOLINT(*)
+  bool HandleICacheTrace(string_list& rest);        // NOLINT(*)
+  bool HandleL2CacheTrace(string_list& rest);       // NOLINT(*)
+  bool HandleL2CFGBase(string_list& rest);          // NOLINT(*)
+  bool HandleL2TCMBase(string_list& rest);          // NOLINT(*)
+  bool HandleMemFillRand(string_list& rest);        // NOLINT(*)
+  bool HandleMemFill(string_list& rest);            // NOLINT(*)
+  bool HandleMemTrace(string_list& rest);           // NOLINT(*)
+  bool HandleNullPtr(string_list& rest);            // NOLINT(*)
+  bool HandlePacketAnalyze(string_list& rest);      // NOLINT(*)
+  bool HandlePCFilter(string_list& rest);           // NOLINT(*)
+  bool HandlePCTraceMin(string_list& rest);         // NOLINT(*)
+  bool HandlePCTraceNano(string_list& rest);        // NOLINT(*)
+  bool HandlePCTrace(string_list& rest);            // NOLINT(*)
+  bool HandlePMUStatsFile(string_list& rest);       // NOLINT(*)
+  bool HandleProfile(string_list& rest);            // NOLINT(*)
+  bool HandleProfileTimeZero(string_list& rest);    // NOLINT(*)
+  bool HandleQuiet(string_list& rest);              // NOLINT(*)
+  bool HandleReconnect(string_list& rest);          // NOLINT(*)
+  bool HandleRTOS(string_list& rest);               // NOLINT(*)
+  bool HandleSimErr(string_list& rest);             // NOLINT(*)
+  bool HandleSimIn(string_list& rest);              // NOLINT(*)
+  bool HandleSimOut(string_list& rest);             // NOLINT(*)
+  bool HandleStackStart(string_list& rest);         // NOLINT(*)
+  bool HandleStallTrace(string_list& rest);         // NOLINT(*)
+  bool HandleStatsFile(string_list& rest);          // NOLINT(*)
+  bool HandleSubsystemBase(string_list& rest);      // NOLINT(*)
+  bool HandleSymFile(string_list& rest);            // NOLINT(*)
+  bool HandleTCM(string_list& rest);                // NOLINT(*)
+  bool HandleTCMHighAddr(string_list& rest);        // NOLINT(*)
+  bool HandleTCMLowAddr(string_list& rest);         // NOLINT(*)
+  bool HandleTimeFilterNS(string_list& rest);       // NOLINT(*)
+  bool HandleTiming(string_list& rest);             // NOLINT(*)
+  bool HandleUArchTrace(string_list& rest);         // NOLINT(*)
+  bool HandleUseFS(string_list& rest);              // NOLINT(*)
+  bool HandleV2PTranslation(string_list& rest);     // NOLINT(*)
+  bool HandleVerbose(string_list& rest);            // NOLINT(*)
+
+  using MaybeUInt64 = llvm::Optional;
+  using MaybeUIntRange = std::pair;
+
+  bool should_parse_next(const string_list& rest);
+  llvm::Optional to_interval(const detail::MaybeString& str);
+  llvm::Optional to_timingmode(
+      const detail::MaybeString& str);
+  llvm::Optional to_verbosemode(
+      const detail::MaybeString& str);
+  llvm::Optional to_nullptr(const detail::MaybeString& str);
+
+  MaybeUIntRange ahb_, axi2_;
+  llvm::Optional debug_port_;
+  detail::non_const_str sim_dev_args_;
+
+  using OptionHandler = bool (HexagonSimulator::*)(string_list&);
+  static std::map opt_map_;
+};
+
+decltype(HexagonSimulator::opt_map_) HexagonSimulator::opt_map_ = {
+    {"--ahbbuspenalty", &HexagonSimulator::HandleAHBBusPenalty},
+    {"--ahbbusratio", &HexagonSimulator::HandleAHBBusRatio},
+    {"--ahb:highaddr", &HexagonSimulator::HandleAHBHighAddr},
+    {"--ahb:lowaddr", &HexagonSimulator::HandleAHBLowAddr},
+    {"--axi2buspenalty", &HexagonSimulator::HandleAXI2BusPenalty},
+    {"--axi2busratio", &HexagonSimulator::HandleAXI2BusRatio},
+    {"--axi2:highaddr", &HexagonSimulator::HandleAXI2HighAddr},
+    {"--axi2:lowaddr", &HexagonSimulator::HandleAXI2LowAddr},
+    {"-b", &HexagonSimulator::HandleBusTrace},
+    {"--build_tag", &HexagonSimulator::HandleBuildTag},
+    {"--buspenalty", &HexagonSimulator::HandleBusPenalty},
+    {"--busratio", &HexagonSimulator::HandleBusRatio},
+    {"--bustrace", &HexagonSimulator::HandleBusTrace},
+    {"--bypass_idle", &HexagonSimulator::HandleBypassIdle},
+    {"--connection_timeout", &HexagonSimulator::HandleConnectionTimeout},
+    {"--coproctrace", &HexagonSimulator::HandleCoprocTrace},
+    {"--coredump", &HexagonSimulator::HandleCoreDump},
+    {"--cosim_file", &HexagonSimulator::HandleCosimFile},
+    {"--dcachetrace", &HexagonSimulator::HandleDCacheTrace},
+    {"--dsp_clock", &HexagonSimulator::HandleDSPClock},
+    {"-E", &HexagonSimulator::HandleSimErr},
+    {"--etm_base", &HexagonSimulator::HandleETMCFGBase},
+    {"--etmcfg_base", &HexagonSimulator::HandleETMCFGBase},
+    {"--gdbserv", &HexagonSimulator::HandleGDBServ},
+    {"-G", &HexagonSimulator::HandleGDBServ},
+    {"--hvx_length", &HexagonSimulator::HandleHVXLength},
+    {"--icachetrace", &HexagonSimulator::HandleICacheTrace},
+    {"-I", &HexagonSimulator::HandleSimIn},
+    {"--l2cachetrace", &HexagonSimulator::HandleL2CacheTrace},
+    {"--l2cfg_base", &HexagonSimulator::HandleL2CFGBase},
+    {"--l2tcm_base", &HexagonSimulator::HandleL2TCMBase},
+    {"--memfill", &HexagonSimulator::HandleMemFill},
+    {"--memfill_rand", &HexagonSimulator::HandleMemFillRand},
+    {"--memtrace", &HexagonSimulator::HandleMemTrace},
+    {"-m", &HexagonSimulator::HandleMemTrace},
+    {"--nullptr", &HexagonSimulator::HandleNullPtr},
+    {"-O", &HexagonSimulator::HandleSimOut},
+    {"--packet_analyze", &HexagonSimulator::HandlePacketAnalyze},
+    {"--pcfilter", &HexagonSimulator::HandlePCFilter},
+    {"--pctrace", &HexagonSimulator::HandlePCTrace},
+    {"--pctrace_min", &HexagonSimulator::HandlePCTraceMin},
+    {"--pctrace_nano", &HexagonSimulator::HandlePCTraceNano},
+    {"-p", &HexagonSimulator::HandleProfile},
+    {"--pmu_statsfile", &HexagonSimulator::HandlePMUStatsFile},
+    {"--profile", &HexagonSimulator::HandleProfile},
+    {"--profile_timezero", &HexagonSimulator::HandleProfileTimeZero},
+    {"-q", &HexagonSimulator::HandleQuiet},
+    {"--quiet", &HexagonSimulator::HandleQuiet},
+    {"--reconnect", &HexagonSimulator::HandleReconnect},
+    {"--rtos", &HexagonSimulator::HandleRTOS},
+    {"-S", &HexagonSimulator::HandleStatsFile},
+    {"--sim_err", &HexagonSimulator::HandleSimErr},
+    {"--sim_in", &HexagonSimulator::HandleSimIn},
+    {"--sim_out", &HexagonSimulator::HandleSimOut},
+    {"--stackstart", &HexagonSimulator::HandleStackStart},
+    {"--stalltrace", &HexagonSimulator::HandleStallTrace},
+    {"--statsfile", &HexagonSimulator::HandleStatsFile},
+    {"--subsystem_base", &HexagonSimulator::HandleSubsystemBase},
+    {"--symfile", &HexagonSimulator::HandleSymFile},
+    {"--tcm", &HexagonSimulator::HandleTCM},
+    {"--tcm:highaddr", &HexagonSimulator::HandleTCMHighAddr},
+    {"--tcm:lowaddr", &HexagonSimulator::HandleTCMLowAddr},
+    {"-t", &HexagonSimulator::HandlePCTrace},
+    {"--timefilter_ns", &HexagonSimulator::HandleTimeFilterNS},
+    {"--timing", &HexagonSimulator::HandleTiming},
+    {"--uarchtrace", &HexagonSimulator::HandleUArchTrace},
+    {"-u", &HexagonSimulator::HandlePCTraceMin},
+    {"--usefs", &HexagonSimulator::HandleUseFS},
+    {"--v2p_translation", &HexagonSimulator::HandleV2PTranslation},
+    {"--verbose", &HexagonSimulator::HandleVerbose},
+};
+
+#define CHECKED_CALL(func, ...)                            \
+  do {                                                     \
+    HEXAPI_Status s = sim_->func(__VA_ARGS__);             \
+    CHECK_EQ(s, HEX_STAT_SUCCESS)                          \
+        << "HexagonSimulator: " #func " failed with code " \
+        << HexagonSimulator::to_string(s);                 \
+  } while (false)
+
+inline HEX_VA_t HexagonSimulator::p2va(const void* p) {
+  uintptr_t u = reinterpret_cast(p);
+  HEX_VA_t va = static_cast(u);
+  CHECK_EQ(static_cast(va), u);
+  return va;
+}
+
+inline void* HexagonSimulator::va2p(HEX_VA_t va) {
+  return reinterpret_cast(static_cast(va));
+}
+
+template 
+constexpr bool is_multiple_of() {
+  return (N / A) * A == N;
+}
+
+std::shared_ptr CreateHexagonSimulator() {
+  return std::make_shared(/*enable_queuing=*/true);
+}
+
+template 
+void HexagonSimulator::CopyNToV(HEX_VA_t dst, const void* host_src) {
+  using src_uint_t = typename unalign::type>::type;
+  auto* ps = reinterpret_cast(host_src);
+  CHECK_EQ(sim_->WriteVirtual(dst, -1u, N, ps->value), HEX_STAT_SUCCESS);
+}
+
+template 
+void HexagonSimulator::CopyNFromV(void* host_dst, HEX_VA_t src) {
+  typename uint::type v;
+  CHECK_EQ(sim_->ReadVirtual(src, -1u, N, &v), HEX_STAT_SUCCESS);
+
+  using dst_uint_t = typename unalign::type>::type;
+  auto* pd = reinterpret_cast(host_dst);
+  pd->value = v;
+}
+
+void HexagonSimulator::CopyToV(HEX_VA_t dst, const void* host_src,
+                               unsigned len) {
+  const uint8_t* src = static_cast(host_src);
+
+  while (len >= 8) {
+    CopyNToV<8>(dst, src);
+    dst += 8;
+    src += 8;
+    len -= 8;
+  }
+  if (len >= 4) {
+    CopyNToV<4>(dst, src);
+    dst += 4;
+    src += 4;
+    len -= 4;
+  }
+  if (len >= 2) {
+    CopyNToV<2>(dst, src);
+    dst += 2;
+    src += 2;
+    len -= 2;
+  }
+  if (len >= 1) {
+    CopyNToV<1>(dst, src);
+    dst++;
+    src++;
+    len--;
+  }
+  CHECK_EQ(len, 0);
+}
+
+void HexagonSimulator::CopyFromV(void* host_dst, HEX_VA_t src, unsigned len) {
+  uint8_t* dst = static_cast(host_dst);
+
+  while (len >= 8) {
+    CopyNFromV<8>(dst, src);
+    dst += 8;
+    src += 8;
+    len -= 8;
+  }
+  if (len >= 4) {
+    CopyNFromV<4>(dst, src);
+    dst += 4;
+    src += 4;
+    len -= 4;
+  }
+  if (len >= 2) {
+    CopyNFromV<2>(dst, src);
+    dst += 2;
+    src += 2;
+    len -= 2;
+  }
+  if (len >= 1) {
+    CopyNFromV<1>(dst, src);
+    dst++;
+    src++;
+    len--;
+  }
+  CHECK_EQ(len, 0);
+}
+
+void HexagonSimulator::SendMsg(Message& m, const void* data, bool show_dbg) {
+  auto run = [this](bool report_cycles) {
+    HEXAPI_CoreState core = HEX_CORE_RESET;
+    HEX_4u_t result;
+    HEX_8u_t cycles0, cycles1;
+    if (report_cycles) {
+      CHECK_EQ(sim_->GetSimulatedCycleCount(&cycles0), HEX_STAT_SUCCESS);
+    }
+
+    core = sim_->Run(&result);
+    CHECK_EQ(core, HEX_CORE_BREAKPOINT);
+    if (report_cycles) {
+      CHECK_EQ(sim_->GetSimulatedCycleCount(&cycles1), HEX_STAT_SUCCESS);
+      LOG(INFO) << "host: execution took " << (cycles1 - cycles0) << " cycles";
+    }
+  };
+
+  // Send the message request.
+  Message r = {kMsgReq, m.len, 0u};
+  CopyToV(message_buffer_v_, &r, sizeof(r));
+  run(false);
+
+  // Receive the acknowledgement with the address for the payload.
+  CopyFromV(&r, message_buffer_v_, sizeof(r));
+  CHECK_EQ(r.code, kMsgAck);
+  CHECK_GE(r.len, m.len);
+
+  // Send the actual message.
+  m.va = r.va;
+  CopyToV(message_buffer_v_, &m, sizeof(m));
+  if (m.len > 0) CopyToV(r.va, data, m.len);
+  run(show_dbg);
+
+  // Receive the return data.
+  CopyFromV(&m, message_buffer_v_, sizeof(m));
+  CHECK_EQ(m.code, kNone);
+}
+
+HexagonSimulator::HexagonSimulator(bool enable_queuing) {
+  task_queuing_ = enable_queuing;
+
+  // The simulator argument string is in the form:
+  //    
+  // The optional arguments are seperated with spaces:
+  // Ex: --hvx_length 128 --memfill 0 --timing -m output.txt
+  const char* sim_args_env = std::getenv("HEXAGON_SIM_ARGS");
+  if (sim_args_env == nullptr) sim_args_env = "";
+  auto sim_args_iss = std::istringstream(std::string(sim_args_env));
+  using iterator = std::istream_iterator;
+  auto sim_args = string_list(iterator(sim_args_iss), iterator());
+
+  std::string target_str =
+      !sim_args.empty() ? *detail::pop_front(sim_args) : std::string("v66");
+
+  arch_ = target_str;
+  sim_ =
+      detail::make_unique(detail::non_const_str(target_str));
+  LOG(INFO) << "HexagonSimulator: Core version: " << arch_;
+
+  // Locate the sim_dev binary in PATH, or in the current working directory.
+  llvm::StringRef sim_dev = "sim_dev";
+  detail::MaybeString path_sim_dev =
+      llvm::sys::Process::FindInEnvPath("PATH", sim_dev);
+  if (!path_sim_dev) {
+    if (!llvm::sys::fs::exists(sim_dev)) {
+      LOG(FATAL) << "Cannot find sim_dev in PATH.";
+    }
+    path_sim_dev = sim_dev.str();
+  }
+
+  CHECKED_CALL(ConfigureExecutableBinary, path_sim_dev->c_str());
+
+  std::vector app_args = {*path_sim_dev};
+  if (char* ev = getenv("ADSP_LIBRARY_PATH")) {
+    app_args.push_back("-L");
+    app_args.push_back(ev);
+  }
+  sim_dev_args_ = detail::non_const_str(app_args);
+  CHECKED_CALL(ConfigureAppCommandLine, sim_dev_args_.size(), sim_dev_args_);
+
+  LOG(INFO) << "HexagonSimulator: ConfigureHVXLength: 128";
+  CHECKED_CALL(ConfigureHVXLength, 128);
+
+  Configure(sim_args);
+
+  CHECKED_CALL(EndOfConfiguration);
+  CHECKED_CALL(LoadExecutableBinary);
+  CHECKED_CALL(ReadSymbolValue, "dispatch", &dispatch_v_);
+  CHECKED_CALL(ReadSymbolValue, "message_buffer", &message_buffer_v_);
+  CHECKED_CALL(SetBreakpoint, dispatch_v_);
+
+  HEXAPI_CoreState core = HEX_CORE_RESET;
+
+  HEX_4u_t result;
+  core = sim_->Run(&result);
+  if (core != HEX_CORE_BREAKPOINT) {
+    LOG(FATAL) << "HexagonSimulator: Run not stopped on breakpoint, "
+                  "code="
+               << static_cast(core);
+  }
+
+  // At this point the simulator has executed the executable's initialization
+  // code that could have written to the SSR register.
+  // Enable UPCYCLE register.
+  HEX_4u_t thread_num;
+  CHECKED_CALL(GetCurrentHWThreadNum, &thread_num);
+  HEX_4u_t thread_ssr;
+  CHECKED_CALL(ReadThreadRegister, thread_num, TH_REG_SSR, &thread_ssr);
+  thread_ssr |= (1 << 23);
+  CHECKED_CALL(WriteThreadRegister, thread_num, TH_REG_SSR, thread_ssr);
+}
+
+void* HexagonSimulator::Alloc(unsigned size, unsigned align) {
+  LOG(INFO) << "HexagonSimulator::Alloc(size=" << size << ", align=" << align
+            << ')';
+  Message m = {kAlloc, sizeof(MsgAlloc), 0u};
+  MsgAlloc ma = {size, align};
+  SendMsg(m, &ma, true);
+
+  CHECK_EQ(sizeof(MsgPointer), m.len);
+  MsgPointer mp;
+  CopyFromV(&mp, m.va, m.len);
+
+  LOG(INFO) << "HexagonSimulator::Alloc -> " << std::hex << mp.va << std::dec;
+  CHECK_NE(mp.va, 0);
+  return va2p(mp.va);
+}
+
+void HexagonSimulator::Free(void* ptr) {
+  LOG(INFO) << "HexagonSimulator::Free(ptr=" << std::hex << ptr << std::dec
+            << ')';
+  if (task_queuing_) {
+    Message mf = {kFlush, 0, 0};
+    SendMsg(mf, 0, true);
+  }
+  Message m = {kFree, sizeof(MsgPointer), 0u};
+  MsgPointer mp = {p2va(ptr)};
+  SendMsg(m, &mp, true);
+}
+
+void* HexagonSimulator::AllocVtcm(unsigned size, unsigned align) {
+  LOG(INFO) << "HexagonSimulator::AllocVtcm(size=" << size
+            << ", align=" << align << ')';
+  Message m = {kAllocVtcm, sizeof(MsgAlloc), 0u};
+  MsgAlloc ma = {size, align};
+  SendMsg(m, &ma, true);
+
+  CHECK_EQ(sizeof(MsgPointer), m.len);
+  MsgPointer mp;
+  CopyFromV(&mp, m.va, m.len);
+
+  LOG(INFO) << "HexagonSimulator::AllocVtcm -> " << std::hex << mp.va
+            << std::dec;
+  CHECK_NE(mp.va, 0);
+  return va2p(mp.va);
+}
+
+void HexagonSimulator::FreeVtcm(void* ptr) {}
+
+void HexagonSimulator::CopyDeviceToDevice(void* dst, const void* src,
+                                          unsigned len) {
+  LOG(INFO) << "HexagonSimulator::CopyDeviceToDevice(dst=" << std::hex << dst
+            << ", src=" << src << ", len=" << std::dec << len << ')';
+  CHECK(dst != nullptr && src != nullptr);
+  Message m = {kCopy, sizeof(MsgCopy), 0u};
+  MsgCopy mc = {p2va(dst), p2va(src), len};
+  SendMsg(m, &mc, true);
+}
+
+void HexagonSimulator::CopyDeviceToHost(void* host_dst, const void* src,
+                                        unsigned len) {
+  LOG(INFO) << "HexagonSimulator::CopyDeviceToHost(host_dst=" << host_dst
+            << ", src=" << src << ", len=" << len << ')';
+  if (task_queuing_) {
+    Message mf = {kFlush, 0, 0};
+    SendMsg(mf, 0, true);
+  }
+  CopyFromV(host_dst, p2va(src), len);
+}
+
+void HexagonSimulator::CopyHostToDevice(void* dst, const void* host_src,
+                                        unsigned len) {
+  LOG(INFO) << "HexagonSimulator::CopyHostToDevice(dst=" << dst
+            << ", host_src=" << host_src << ", len=" << len << ')';
+  CopyToV(p2va(dst), host_src, len);
+}
+
+void* HexagonSimulator::Load(const std::string& data, const std::string& fmt) {
+  // Load the shared library.
+  Message m = {kLoad, static_cast(data.size() + 1), 0u};
+  SendMsg(m, data.c_str(), false);
+
+  CHECK_EQ(sizeof(MsgPointer), m.len);
+  MsgPointer mp;
+  CopyFromV(&mp, m.va, sizeof(mp));
+
+  return va2p(mp.va);
+}
+
+void HexagonSimulator::Unload(void* mod) {
+  CHECK(mod);
+  Message m = {kUnload, sizeof(MsgPointer), 0u};
+  MsgPointer mp = {p2va(mod)};
+  SendMsg(m, &mp, false);
+}
+
+void* HexagonSimulator::Resolve(const std::string& sym) {
+  LOG(INFO) << "HexagonSimulator::Resolve(sym=" << sym << ')';
+  Message m = {kResolve, static_cast(sym.size() + 1), 0u};
+  SendMsg(m, sym.c_str(), true);
+
+  CHECK_EQ(sizeof(MsgPointer), m.len);
+  MsgPointer mp;
+  CopyFromV(&mp, m.va, sizeof(mp));
+
+  LOG(INFO) << "HexagonSimulator::Resolve -> " << std::hex << mp.va
+            << std::dec;
+  return va2p(mp.va);
+}
+
+void HexagonSimulator::Call(void* func, uint32_t* scalar, unsigned sc_num,
+                            uint32_t* stack, unsigned st_num) {
+  LOG(INFO) << "HexagonSimulator::Call(func=" << std::hex << func
+            << ", scalar=" << scalar << ", sc_num=" << std::dec
+            << sc_num
+            // NOLINTNEXTLINE(build/include_what_you_use)
+            << ", stack=" << std::hex << stack << ", st_num=" << std::dec
+            << st_num;
+
+  std::vector data;
+
+  // Copy the MsgCall contents into the data vector as a sequence of uints.
+  MsgCall me = {p2va(func), sc_num, st_num};
+
+  CHECK((is_multiple_of()));
+  for (unsigned i = 0, e = sizeof(me) / sizeof(uint32_t); i != e; ++i)
+    data.push_back(reinterpret_cast(&me)[i]);
+
+  // Append the scalar (register) arguments.
+  for (unsigned i = 0; i != sc_num; ++i) data.push_back(scalar[i]);
+  // Append the stack contents.
+  for (unsigned i = 0; i != st_num; ++i) data.push_back(stack[i]);
+
+  std::ostringstream log_data;
+  log_data << "data: {" << std::hex;
+  for (unsigned i = 0, e = static_cast(data.size()); i != e; ++i) {
+    log_data << ' ' << reinterpret_cast(data.data())[i];
+  }
+  log_data << std::dec << " }" << std::flush;
+  LOG(INFO) << log_data.str();
+
+  Message m = {kCall, static_cast(data.size() * sizeof(uint32_t)),
+               0u};
+  SendMsg(m, data.data(), true);
+
+  if (!task_queuing_) {
+    Message mf = {kFlush, 0, 0};
+    SendMsg(mf, 0, true);
+  }
+
+  std::vector rv(m.len);
+  CopyFromV(rv.data(), m.va, m.len);
+
+  std::ostringstream log_rv;
+  log_rv << "HexagonSimulator::Call -> {" << std::hex;
+  for (unsigned i = 0, e = std::min(rv.size(), 4u); i != e; ++i) {
+    log_rv << ' ' << std::setw(2) << std::setfill('0')
+           << static_cast(rv[i]);
+  }
+  if (rv.size() > 4) log_rv << "...";
+  log_rv << std::dec << " }";
+  LOG(INFO) << log_rv.str();
+}
+
+bool HexagonSimulator::Configure(string_list& opts) {
+  while (!opts.empty()) {
+    std::string key = *detail::pop_front(opts);
+    auto f = opt_map_.find(key);
+    if (f == opt_map_.end()) {
+      LOG(FATAL) << "Unrecognized simulator option: " << key;
+      // unreachable
+    }
+    CHECK((this->*f->second)(opts)) << "error handling option: " << key;
+  }
+
+  // Check AHB.
+  if (ahb_.first.hasValue() && ahb_.second.hasValue()) {
+    CHECKED_CALL(ConfigureAHB, *ahb_.first, *ahb_.second);
+  } else {
+    CHECK(!ahb_.first.hasValue() && !ahb_.second.hasValue())
+        << "HexagonSimulator: please specify both low and high addresses "
+           "for AHB";
+  }
+
+  // Check AXI2.
+  if (axi2_.first.hasValue() && axi2_.second.hasValue()) {
+    CHECKED_CALL(ConfigureAXI2, *axi2_.first, *axi2_.second);
+  } else {
+    CHECK(!axi2_.first.hasValue() && !axi2_.second.hasValue())
+        << "HexagonSimulator: please specify both low and high addresses "
+           "for AXI2";
+  }
+
+  return true;
+}
+
+bool HexagonSimulator::HandleAHBBusPenalty(string_list& rest) {
+  auto penalty = detail::to_uint(detail::pop_front(rest));
+  auto interval = to_interval(detail::pop_front(rest));
+  if (penalty && interval) {
+    CHECKED_CALL(ConfigureAHBBusPenalty, *penalty, *interval);
+  }
+  return static_cast(penalty) && static_cast(interval);
+}
+
+bool HexagonSimulator::HandleAHBBusRatio(string_list& rest) {
+  auto ratio = detail::to_float(detail::pop_front(rest));
+  if (ratio) {
+    CHECKED_CALL(ConfigureAHBBusRatio, *ratio);
+  }
+  return static_cast(ratio);
+}
+
+bool HexagonSimulator::HandleAHBHighAddr(string_list& rest) {
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  CHECK(addr) << "HexagonSimulator: invalid value for AHB high adddress";
+  if (addr) {
+    ahb_.second = *addr;
+  }
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleAHBLowAddr(string_list& rest) {
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  CHECK(addr) << "HexagonSimulator: invalid value for AHB low adddress";
+  if (addr) {
+    ahb_.first = *addr;
+  }
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleAXI2BusPenalty(string_list& rest) {
+  auto penalty = detail::to_uint(detail::pop_front(rest));
+  auto interval = to_interval(detail::pop_front(rest));
+  if (penalty && interval) {
+    CHECKED_CALL(ConfigureAXI2BusPenalty, *penalty, *interval);
+  }
+  return static_cast(penalty) && static_cast(interval);
+}
+
+bool HexagonSimulator::HandleAXI2BusRatio(string_list& rest) {
+  auto ratio = detail::to_float(detail::pop_front(rest));
+  if (ratio) {
+    CHECKED_CALL(ConfigureAXI2BusRatio, *ratio);
+  }
+  return static_cast(ratio);
+}
+
+bool HexagonSimulator::HandleAXI2HighAddr(string_list& rest) {
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  CHECK(addr) << "HexagonSimulator: invalid value for AXI2 high adddress";
+  if (addr) {
+    axi2_.second = *addr;
+  }
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleAXI2LowAddr(string_list& rest) {
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  CHECK(addr) << "HexagonSimulator: invalid value for AXI2 low adddress";
+  if (addr) {
+    axi2_.first = *addr;
+  }
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleBuildTag(string_list& rest) {
+  sim_->PrintBuildTag();
+  return true;
+}
+
+bool HexagonSimulator::HandleBusPenalty(string_list& rest) {
+  auto penalty = detail::to_uint(detail::pop_front(rest));
+  auto interval = to_interval(detail::pop_front(rest));
+  if (penalty && interval) {
+    CHECKED_CALL(ConfigureBusPenalty, *penalty, *interval);
+  }
+  return static_cast(penalty) && static_cast(interval);
+}
+
+bool HexagonSimulator::HandleBusRatio(string_list& rest) {
+  auto ratio = detail::to_float(detail::pop_front(rest));
+  if (ratio) {
+    CHECKED_CALL(ConfigureBusRatio, *ratio);
+  }
+  return static_cast(ratio);
+}
+
+bool HexagonSimulator::HandleBusTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_BUS, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleBypassIdle(string_list& rest) {
+  CHECKED_CALL(ConfigureBypassIdle, true);
+  return true;
+}
+
+bool HexagonSimulator::HandleConnectionTimeout(string_list& rest) {
+  auto time = detail::to_int(detail::pop_front(rest));
+  if (time) {
+    CHECKED_CALL(ConfigureConnectionTimeout, *time);
+  }
+  return static_cast(time);
+}
+
+bool HexagonSimulator::HandleCoprocTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_COPROC, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleCoreDump(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureCoreDump, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleCosimFile(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureCosim, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleDCacheTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_DCACHE, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleDSPClock(string_list& rest) {
+  auto freq = detail::to_uint(detail::pop_front(rest));
+  if (freq) {
+    CHECKED_CALL(ConfigureCoreFrequency, *freq);
+  }
+  return static_cast(freq);
+}
+
+bool HexagonSimulator::HandleETMCFGBase(string_list& rest) {
+  auto base = detail::to_uint(detail::pop_front(rest));
+  if (base) {
+    CHECKED_CALL(ConfigureEtmcfgBase, *base);
+  }
+  return static_cast(base);
+}
+
+bool HexagonSimulator::HandleGDBServ(string_list& rest) {
+  auto port = detail::to_uint(detail::pop_front(rest));
+  if (port) {
+    CHECKED_CALL(ConfigureRemoteDebug, *port);
+    debug_port_ = *port;
+  }
+  return static_cast(port);
+}
+
+bool HexagonSimulator::HandleHVXLength(string_list& rest) {
+  auto len = detail::to_int(detail::pop_front(rest));
+  if (len) {
+    CHECKED_CALL(ConfigureHVXLength, *len);
+  }
+  return static_cast(len);
+}
+
+bool HexagonSimulator::HandleICacheTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_ICACHE, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleL2CacheTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_L2CACHE, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleL2CFGBase(string_list& rest) {
+  auto base = detail::to_uint(detail::pop_front(rest));
+  if (base) {
+    CHECKED_CALL(ConfigureL2cfgBase, *base);
+  }
+  return static_cast(base);
+}
+
+bool HexagonSimulator::HandleL2TCMBase(string_list& rest) {
+  auto base = detail::to_uint(detail::pop_front(rest));
+  if (base) {
+    CHECKED_CALL(ConfigureL2tcmBase, *base);
+  }
+  return static_cast(base);
+}
+
+bool HexagonSimulator::HandleMemFillRand(string_list& rest) {
+  auto seed = detail::to_uint(detail::pop_front(rest));
+  if (seed) {
+    CHECKED_CALL(ConfigureMemFillRandom, *seed);
+  }
+  return static_cast(seed);
+}
+
+bool HexagonSimulator::HandleMemFill(string_list& rest) {
+  auto val = detail::to_uint(detail::pop_front(rest));
+  if (val) {
+    CHECKED_CALL(ConfigureMemFill, *val);
+  }
+  return static_cast(val);
+}
+
+bool HexagonSimulator::HandleMemTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_MEM, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleNullPtr(string_list& rest) {
+  auto behavior = to_nullptr(detail::pop_front(rest));
+  if (behavior) {
+    CHECKED_CALL(ConfigureNULLPointerBehavior, *behavior);
+  }
+  return static_cast(behavior);
+}
+
+bool HexagonSimulator::HandlePacketAnalyze(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigurePacketAnalysis, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandlePCFilter(string_list& rest) {
+  auto range =
+      detail::to_range(detail::pop_front(rest));
+  if (range) {
+    CHECKED_CALL(ConfigurePCRangeFilter, range->first, range->second);
+  }
+  return static_cast(range);
+}
+
+bool HexagonSimulator::HandlePCTraceMin(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_PC_MIN, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandlePCTraceNano(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_PC_NANO, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandlePCTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_PC, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandlePMUStatsFile(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigurePmuStatisticsFile, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleProfile(string_list& rest) {
+  auto path = detail::pop_front(rest);
+  if (path) {
+    CHECKED_CALL(ConfigureGProf, path->c_str());
+  }
+  return static_cast(path);
+}
+
+bool HexagonSimulator::HandleProfileTimeZero(string_list& rest) {
+  auto timezero = detail::to_bool(detail::pop_front(rest));
+  if (timezero) {
+    CHECKED_CALL(ConfigureProfileMode, *timezero);
+  }
+  return static_cast(timezero);
+}
+
+bool HexagonSimulator::HandleQuiet(string_list& rest) {
+  sim_->VerboseMode(HEX_QUIET);
+  return true;
+}
+
+bool HexagonSimulator::HandleReconnect(string_list& rest) {
+  if (!debug_port_) {
+    LOG(FATAL) << "Reconnect error: --reconnect must be specified "
+                  "AFTER --gdbserv ";
+  }
+  CHECKED_CALL(ConfigureRemoteDebug, *debug_port_, true);
+  return true;
+}
+
+bool HexagonSimulator::HandleRTOS(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureOSAwareness, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleSimErr(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureSimStderr, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleSimIn(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureSimStdin, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleSimOut(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureSimStdout, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleStackStart(string_list& rest) {
+  auto base = detail::to_uint(detail::pop_front(rest));
+  auto size = detail::to_uint(detail::pop_front(rest));
+  if (base && size) {
+    CHECKED_CALL(ConfigureStackInfo, *base, *size);
+  }
+  return static_cast(base) && static_cast(size);
+}
+
+bool HexagonSimulator::HandleStallTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_STALL, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleStatsFile(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureStatisticsFile, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleSubsystemBase(string_list& rest) {
+  auto base = detail::to_uint(detail::pop_front(rest));
+  if (base) {
+    CHECKED_CALL(ConfigureSubsystemBase, *base);
+  }
+  return static_cast(base);
+}
+
+bool HexagonSimulator::HandleSymFile(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(AddSymbolFile, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleTCM(string_list& rest) {
+  CHECKED_CALL(ConfigureTimingMode, HEX_TIMING);
+  return true;
+}
+
+bool HexagonSimulator::HandleTCMHighAddr(string_list& rest) {
+  // This option takes an argument, but (the option) is ignored.
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleTCMLowAddr(string_list& rest) {
+  auto addr = detail::to_uint(detail::pop_front(rest));
+  if (addr) {
+    CHECKED_CALL(ConfigureTCM, *addr);
+  }
+  return static_cast(addr);
+}
+
+bool HexagonSimulator::HandleTimeFilterNS(string_list& rest) {
+  auto range =
+      detail::to_range(detail::pop_front(rest));
+  if (range) {
+    CHECKED_CALL(ConfigureTimeRangeFilter, range->first, HEX_NANOSEC,
+                 range->second, HEX_NANOSEC);
+  }
+  return static_cast(range);
+}
+
+bool HexagonSimulator::HandleTiming(string_list& rest) {
+  HEXAPI_TimingMode timing_mode = HEX_TIMING;
+  // The argument to --timing is optional.
+  if (should_parse_next(rest)) {
+    if (auto mode = to_timingmode(detail::pop_front(rest))) {
+      timing_mode = *mode;
+    } else {
+      return false;
+    }
+  }
+  CHECKED_CALL(ConfigureTimingMode, timing_mode);
+  return true;
+}
+
+bool HexagonSimulator::HandleUArchTrace(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(SetTracing, HEX_TRACE_UARCH, file->c_str());
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleUseFS(string_list& rest) {
+  auto file = detail::pop_front(rest);
+  if (file) {
+    CHECKED_CALL(ConfigureARFilesystem, detail::non_const_str(*file));
+  }
+  return static_cast(file);
+}
+
+bool HexagonSimulator::HandleV2PTranslation(string_list& rest) {
+  auto enable = detail::to_bool(detail::pop_front(rest));
+  if (enable) {
+    CHECKED_CALL(EnableVirtualToPhysicalTranslation, *enable);
+  }
+  return static_cast(enable);
+}
+
+bool HexagonSimulator::HandleVerbose(string_list& rest) {
+  auto mode = to_verbosemode(detail::pop_front(rest));
+  if (mode) {
+    sim_->VerboseMode(*mode);
+  }
+  return static_cast(mode);
+}
+
+bool HexagonSimulator::should_parse_next(const string_list& rest) {
+  if (auto str = detail::front(rest)) {
+    return str->empty() || str->front() != '-';
+  }
+  return false;
+}
+
+llvm::Optional HexagonSimulator::to_interval(
+    const detail::MaybeString& str) {
+  auto none = llvm::Optional();
+  if (!str) return none;
+
+  if (auto val = detail::to_int(*str)) {
+    switch (*val) {
+      case HEX_MILLISEC:
+      case HEX_MICROSEC:
+      case HEX_NANOSEC:
+      case HEX_PICOSEC:
+      case HEX_PCYCLE:
+        return static_cast(*val);
+    }
+  }
+
+  return llvm::StringSwitch>(*str)
+      .Case("MILLISEC", HEX_MILLISEC)
+      .Case("MICROSEC", HEX_MICROSEC)
+      .Case("NANOSEC", HEX_NANOSEC)
+      .Case("PICOSEC", HEX_PICOSEC)
+      .Case("PCYCLE", HEX_PCYCLE)
+      .Default(none);
+}
+
+llvm::Optional HexagonSimulator::to_timingmode(
+    const detail::MaybeString& str) {
+  auto none = llvm::Optional();
+  if (!str) return none;
+
+  if (auto val = detail::to_int(*str)) {
+    switch (*val) {
+      case HEX_NOTIMING:
+      case HEX_TIMING_NODBC:
+      case HEX_TIMING:
+      case HEX_TIMING_COHERENCY:
+        return static_cast(*val);
+    }
+  }
+
+  return llvm::StringSwitch>(*str)
+      .Case("NOTIMING", HEX_NOTIMING)
+      .Case("TIMING_NODBC", HEX_TIMING_NODBC)
+      .Case("TIMING", HEX_TIMING)
+      .Case("TIMING_COHERENCY", HEX_TIMING_COHERENCY)
+      .Default(none);
+}
+
+llvm::Optional HexagonSimulator::to_verbosemode(
+    const detail::MaybeString& str) {
+  auto none = llvm::Optional();
+  if (!str) return none;
+
+  if (auto val = detail::to_int(*str)) {
+    switch (*val) {
+      case HEX_SILENT:
+      case HEX_QUIET:
+      case HEX_NORMAL:
+      case HEX_VERBOSE:
+      case HEX_REALLY_VERBOSE:
+        return static_cast(*val);
+    }
+  }
+
+  return llvm::StringSwitch>(*str)
+      .Case("SILENT", HEX_SILENT)
+      .Case("QUIET", HEX_QUIET)
+      .Case("NORMAL", HEX_NORMAL)
+      .Case("VERBOSE", HEX_VERBOSE)
+      .Case("REALLY_VERBOSE", HEX_REALLY_VERBOSE)
+      .Default(none);
+}
+
+llvm::Optional HexagonSimulator::to_nullptr(
+    const detail::MaybeString& str) {
+  auto none = llvm::Optional();
+  if (!str) return none;
+
+  if (auto val = detail::to_int(*str)) {
+    switch (*val) {
+      case HEX_NULLPTR_IGNORE:
+      case HEX_NULLPTR_WARN:
+      case HEX_NULLPTR_FATAL:
+      case HEX_NULLPTR_PCZERO:
+        return static_cast(*val);
+    }
+  }
+
+  return llvm::StringSwitch>(*str)
+      .Case("IGNORE", HEX_NULLPTR_IGNORE)
+      .Case("WARN", HEX_NULLPTR_WARN)
+      .Case("FATAL", HEX_NULLPTR_FATAL)
+      .Case("PCZERO", HEX_NULLPTR_PCZERO)
+      .Default(none);
+}
+
+std::string HexagonSimulator::to_string(HEXAPI_Status status) {
+  switch (status) {
+    case HEX_STAT_ERROR:
+      return "ERROR";
+    case HEX_STAT_SUCCESS:
+      return "SUCCESS";
+    case HEX_STAT_CANNOT_CONFIG:
+      return "CANNOT_CONFIG";
+    case HEX_STAT_INVALID_ARGS:
+      return "INVALID_ARGS";
+    case HEX_STAT_RANGE_ERROR:
+      return "RANGE_ERROR";
+    case HEX_STAT_FILE_ACCESS_ERROR:
+      return "FILE_ACCESS_ERROR";
+    case HEX_STAT_DEVICE_NOT_FOUND:
+      return "DEVICE_NOT_FOUND";
+    case HEX_STAT_MEM_ACCESS_ERROR:
+      return "MEM_ACCESS_ERROR";
+    case HEX_STAT_CANNOT_TRANSLATE:
+      return "CANNOT_TRANSLATE";
+    case HEX_STAT_NO_ACTIVE_THREADS:
+      return "NO_ACTIVE_THREADS";
+    case HEX_STAT_LOAD_ELF_ERROR:
+      return "LOAD_ELF_ERROR";
+    case HEX_STAT_CORE_RESET:
+      return "CORE_RESET";
+    default:
+      return "unknown";
+  }
+}
+
+}  // namespace hexagon
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/hexagon/sim/hexagon_sim_proto.h b/src/runtime/hexagon/sim/hexagon_sim_proto.h
new file mode 100644
index 000000000000..2a41536037df
--- /dev/null
+++ b/src/runtime/hexagon/sim/hexagon_sim_proto.h
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_
+#define TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_
+
+// Protocol:
+
+// Host >-- [ code:MsgReq,  len:amount requested, va:_       ] --> Remote
+// Host <-- [ code:MsqAck,  len:amount provided,  va:address ] --< Remote
+// Host >-- [ code:message, len:payload length,   va:address ] --> Remote
+// Host <-- [ code:None,    len:response length,  va:address ] --< Remote
+
+enum : uint32_t {
+  kNone,
+  kMsgReq,
+  kMsgAck,
+  kAlloc,
+  kFree,
+  kCopy,
+  kLoad,
+  kUnload,
+  kResolve,
+  kCall,
+  kFlush,
+  kAllocVtcm
+};
+
+struct Message {
+  uint32_t code;
+  uint32_t len;
+  uint32_t va;
+} __attribute__((packed));
+
+struct MsgAlloc {
+  uint32_t size;
+  uint32_t align;
+} __attribute__((packed));
+
+struct MsgPointer {
+  uint32_t va;
+} __attribute__((packed));
+
+struct MsgCopy {
+  uint32_t dst;
+  uint32_t src;
+  uint32_t len;
+} __attribute__((packed));
+
+struct MsgCall {
+  uint32_t func_va;     // offset:  0
+  uint32_t scalar_num;  //          4
+  uint32_t stack_num;   //          8
+  uint32_t data[];      //         12
+} __attribute__((packed));
+
+#endif  // TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_
diff --git a/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote.h b/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote.h
new file mode 100644
index 000000000000..bc8766c63db2
--- /dev/null
+++ b/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote.h
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
+#define TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
+/// @file tvm_hexagon_remote.idl
+/// IDL to offload TVM kernels to Hexagon from APPS for multi-domains
+#include "AEEStdDef.h"
+#include "remote.h"
+#ifndef __QAIC_HEADER
+#define __QAIC_HEADER(ff) ff
+#endif  // __QAIC_HEADER
+
+#ifndef __QAIC_HEADER_EXPORT
+#define __QAIC_HEADER_EXPORT
+#endif  // __QAIC_HEADER_EXPORT
+
+#ifndef __QAIC_HEADER_ATTRIBUTE
+#define __QAIC_HEADER_ATTRIBUTE
+#endif  // __QAIC_HEADER_ATTRIBUTE
+
+#ifndef __QAIC_IMPL
+#define __QAIC_IMPL(ff) ff
+#endif  // __QAIC_IMPL
+
+#ifndef __QAIC_IMPL_EXPORT
+#define __QAIC_IMPL_EXPORT
+#endif  // __QAIC_IMPL_EXPORT
+
+#ifndef __QAIC_IMPL_ATTRIBUTE
+#define __QAIC_IMPL_ATTRIBUTE
+#endif  // __QAIC_IMPL_ATTRIBUTE
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * Opens the handle in the specified domain.  If this is the first
+ * handle, this creates the session.  Typically this means opening
+ * the device, aka open("/dev/adsprpc-smd"), then calling ioctl
+ * device APIs to create a PD on the DSP to execute our code in,
+ * then asking that PD to dlopen the .so and dlsym the skel function.
+ *
+ * @param uri, _URI"&_dom=aDSP"
+ *    _URI is a QAIC generated uri, or
+ *    "file:///?_skel_handle_invoke&_modver=1.0"
+ *    If the _dom parameter is not present, _dom=DEFAULT is assumed
+ *    but not forwarded.
+ *    Reserved uri keys:
+ *      [0]: first unamed argument is the skel invoke function
+ *      _dom: execution domain name, _dom=mDSP/aDSP/DEFAULT
+ *      _modver: module version, _modver=1.0
+ *      _*: any other key name starting with an _ is reserved
+ *    Unknown uri keys/values are forwarded as is.
+ * @param h, resulting handle
+ * @retval, 0 on success
+ */
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_open)(
+    const char* uri, remote_handle64* h) __QAIC_HEADER_ATTRIBUTE;
+/**
+    * Closes a handle.  If this is the last handle to close, the session
+    * is closed as well, releasing all the allocated resources.
+
+    * @param h, the handle to close
+    * @retval, 0 on success, should always succeed
+    */
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_close)(
+    remote_handle64 h) __QAIC_HEADER_ATTRIBUTE;
+typedef struct _tvm_hexagon_remote_buffer__seq_octet
+    _tvm_hexagon_remote_buffer__seq_octet;
+typedef _tvm_hexagon_remote_buffer__seq_octet tvm_hexagon_remote_buffer;
+struct _tvm_hexagon_remote_buffer__seq_octet {
+  unsigned char* data;
+  int dataLen;
+};
+typedef unsigned int tvm_hexagon_remote_handle_t;
+typedef uint64 tvm_hexagon_remote_scalar_t;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_load_library)(
+    remote_handle64 _h, const char* soname, int sonameLen, const char* code,
+    int codeLen,
+    tvm_hexagon_remote_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_get_symbol)(
+    remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr,
+    const char* name, int nameLen,
+    tvm_hexagon_remote_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_kernel)(
+    remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr,
+    tvm_hexagon_remote_handle_t symbol, int* scalar, int scalarLen, int* stack,
+    int stackLen, const tvm_hexagon_remote_buffer* scalar_in_octet,
+    int scalar_in_octetLen, tvm_hexagon_remote_buffer* scalar_out_octet,
+    int scalar_out_octetLen, const tvm_hexagon_remote_buffer* stack_in_octet,
+    int stack_in_octetLen, tvm_hexagon_remote_buffer* stack_out_octet,
+    int stack_out_octetLen, uint64* pcycles,
+    uint64* time_usec) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_release_library)(
+    remote_handle64 _h,
+    tvm_hexagon_remote_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_alloc_vtcm)(
+    remote_handle64 _h, unsigned int size, unsigned int align,
+    unsigned int* dsp_va) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_free_vtcm)(
+    remote_handle64 _h, unsigned int dsp_va) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_call_mmap64)(
+    remote_handle64 _h) __QAIC_HEADER_ATTRIBUTE;
+#ifndef tvm_hexagon_remote_URI
+#define tvm_hexagon_remote_URI                                            \
+  "file:///"                                                              \
+  "libtvm_hexagon_remote_skel.so?tvm_hexagon_remote_skel_handle_invoke&_" \
+  "modver=1.0"
+#endif /*tvm_hexagon_remote_URI*/
+#ifdef __cplusplus
+}
+#endif
+#endif  // TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
diff --git a/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote_nd.h b/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote_nd.h
new file mode 100644
index 000000000000..bb35bd30f679
--- /dev/null
+++ b/src/runtime/hexagon/target/fastrpc/tvm_hexagon_remote_nd.h
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
+#define TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
+/// @file tvm_hexagon_remote_nd.idl
+/// IDL to offload TVM kernels to Hexagon from APPS for non-domains
+#include "AEEStdDef.h"
+#include "remote.h"
+#ifndef __QAIC_HEADER
+#define __QAIC_HEADER(ff) ff
+#endif  // __QAIC_HEADER
+
+#ifndef __QAIC_HEADER_EXPORT
+#define __QAIC_HEADER_EXPORT
+#endif  // __QAIC_HEADER_EXPORT
+
+#ifndef __QAIC_HEADER_ATTRIBUTE
+#define __QAIC_HEADER_ATTRIBUTE
+#endif  // __QAIC_HEADER_ATTRIBUTE
+
+#ifndef __QAIC_IMPL
+#define __QAIC_IMPL(ff) ff
+#endif  // __QAIC_IMPL
+
+#ifndef __QAIC_IMPL_EXPORT
+#define __QAIC_IMPL_EXPORT
+#endif  // __QAIC_IMPL_EXPORT
+
+#ifndef __QAIC_IMPL_ATTRIBUTE
+#define __QAIC_IMPL_ATTRIBUTE
+#endif  // __QAIC_IMPL_ATTRIBUTE
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef struct _tvm_hexagon_remote_nd_buffer__seq_octet
+    _tvm_hexagon_remote_nd_buffer__seq_octet;
+typedef _tvm_hexagon_remote_nd_buffer__seq_octet tvm_hexagon_remote_nd_buffer;
+struct _tvm_hexagon_remote_nd_buffer__seq_octet {
+  unsigned char* data;
+  int dataLen;
+};
+typedef unsigned int tvm_hexagon_remote_nd_handle_t;
+typedef uint64 tvm_hexagon_remote_nd_scalar_t;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_open)(void)
+    __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_close)(void)
+    __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_load_library)(
+    const char* soname, int sonameLen, const char* code, int codeLen,
+    tvm_hexagon_remote_nd_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_get_symbol)(
+    tvm_hexagon_remote_nd_handle_t module_ptr, const char* name, int nameLen,
+    tvm_hexagon_remote_nd_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_kernel)(
+    tvm_hexagon_remote_nd_handle_t module_ptr,
+    tvm_hexagon_remote_nd_handle_t symbol, int* scalar, int scalarLen,
+    int* stack, int stackLen,
+    const tvm_hexagon_remote_nd_buffer* scalar_in_octet,
+    int scalar_in_octetLen, tvm_hexagon_remote_nd_buffer* scalar_out_octet,
+    int scalar_out_octetLen,
+    const tvm_hexagon_remote_nd_buffer* stack_in_octet, int stack_in_octetLen,
+    tvm_hexagon_remote_nd_buffer* stack_out_octet, int stack_out_octetLen,
+    uint64* pcycles, uint64* time_usec) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_release_library)(
+    tvm_hexagon_remote_nd_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE;
+__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_call_mmap64)(void)
+    __QAIC_HEADER_ATTRIBUTE;
+#ifdef __cplusplus
+}
+#endif
+#endif  // TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
diff --git a/src/runtime/hexagon/target/hexagon_device_target.cc b/src/runtime/hexagon/target/hexagon_device_target.cc
new file mode 100644
index 000000000000..9d688c1b3d12
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_device_target.cc
@@ -0,0 +1,525 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef __ANDROID__
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "../hexagon_module.h"
+#include "AEEStdErr.h"
+#include "fastrpc/tvm_hexagon_remote.h"
+#include "hexagon_dsprpcapi.h"
+#include "hexagon_stubapi.h"
+#include "hexagon_target_log.h"
+#include "remote64.h"
+#include "rpcmem.h"
+
+#pragma weak remote_session_control
+
+#define RPCMEM_HEAP 25
+
+// All log messages start with "HexagonTarget::%s", where %s is replaced
+// with the function name, so create macros that add that to avoid repetition.
+// The downside is that the format string must be given as a string literal,
+// but it seems to be a minor issue.
+#define VA_EXPANDER(...) , ##__VA_ARGS__
+#define TVM_LOGD_HT(fmt, ...) \
+  TVM_LOGD("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__))
+#define TVM_LOGE_HT(fmt, ...) \
+  TVM_LOGE("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__))
+
+namespace tvm {
+namespace runtime {
+namespace hexagon {
+
+static constexpr int kStackSize = 128 * 1024;  // 128kB stack
+
+class HexagonTarget : public tvm::runtime::hexagon::Device {
+ public:
+  HexagonTarget() {}
+  ~HexagonTarget() final {}
+  void* Alloc(unsigned size, unsigned align) final;
+  void Free(void* ptr) final;
+  void* AllocVtcm(unsigned size, unsigned align) final;
+  void FreeVtcm(void* ptr) final;
+  void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final;
+  void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final;
+  void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final;
+  void* Load(const std::string& data, const std::string& fmt) final;
+  void Unload(void* mod) final;
+  void* Resolve(const std::string& sym) final;
+  void Call(void* func, uint32_t* scalar, unsigned scalar_num, uint32_t* stack,
+            unsigned stack_num) final;
+
+ private:
+  std::pair AddAddrMapping(const void* dsp_addr,
+                                          void* apps_addr, size_t size);
+  std::pair GetAppsAddr(const void* dsp_addr, bool exact) const;
+  void RemoveAddrMapping(const void* dsp_addr);
+  int OpenDomainChannel(bool set_unsigned_pd);
+  int CloseDomainChannel();
+  void ReleaseLibrary();
+  void FreeMemoryBeforeChannelClose();
+
+  // Mapping from a DSP address to a pair .
+  // Using void* pointers is ok, since DSP pointers will always fit
+  // in apps's pointers, i.e. sizeof_dsp(void*) <= sizeof_apps(void*).
+  std::map> dsp_to_apps_;
+  std::map> vtcm_addr_;
+  remote_handle64 domain_channel_handle_ = AEE_EUNKNOWN;
+  tvm_hexagon_remote_handle_t module_pointer_ = AEE_EUNKNOWN;
+  uint64_t count_channel_open_ = 0;
+  // Global lock, used for all critical sections. This can be refined
+  // in the future.
+  mutable std::mutex crit_section_;
+};
+
+std::shared_ptr CreateHexagonTarget() {
+  return std::make_shared();
+}
+
+std::pair HexagonTarget::AddAddrMapping(const void* dsp_addr,
+                                                       void* apps_addr,
+                                                       size_t size) {
+  crit_section_.lock();
+  auto p = dsp_to_apps_.insert({dsp_addr, {apps_addr, size}});
+  crit_section_.unlock();
+  if (!p.second) {
+    TVM_LOGE_HT(
+        "failed to insert address mapping: dsp:%p -> apps:%p, size:%zu",
+        dsp_addr, apps_addr, size);
+    return std::make_pair(nullptr, 0);
+  }
+  TVM_LOGD_HT("added address mapping: dsp:%p -> apps:%p, size:%zu", dsp_addr,
+              apps_addr, size);
+  return p.first->second;
+}
+
+void HexagonTarget::RemoveAddrMapping(const void* dsp_addr) {
+  crit_section_.lock();
+  auto f = dsp_to_apps_.find(dsp_addr);
+  if (f == dsp_to_apps_.end()) {
+    TVM_LOGE_HT("failed to remove address mapping for dsp:%p", dsp_addr);
+    crit_section_.unlock();
+    return;
+  }
+  dsp_to_apps_.erase(f);
+  crit_section_.unlock();
+}
+
+std::pair HexagonTarget::GetAppsAddr(const void* dsp_addr,
+                                                    bool exact) const {
+  struct AutoUnlock {
+    explicit AutoUnlock(std::mutex& m) : m(m) {}
+    ~AutoUnlock() { m.unlock(); }
+    std::mutex& m;
+  };
+
+  crit_section_.lock();
+  AutoUnlock u(crit_section_);
+
+  // If the address is in the map, simply return the result.
+  auto f = dsp_to_apps_.find(dsp_addr);
+  if (f != dsp_to_apps_.end()) return f->second;
+  // If exact mapping is requested, then it hasn't been found.
+  if (exact) return std::make_pair(nullptr, 0);
+
+  // If the address is not in the map, maybe it points to somewhere in the
+  // interior of a mapped buffer.
+  uintptr_t dsp_v = reinterpret_cast(dsp_addr);
+  for (const auto& v : dsp_to_apps_) {
+    uintptr_t dsp_k = reinterpret_cast(v.first);
+    size_t size = v.second.second;
+    if (dsp_v >= dsp_k && dsp_v < dsp_k + size) {
+      uintptr_t apps_k = reinterpret_cast(v.second.first);
+      size_t offset = dsp_v - dsp_k;
+      uintptr_t apps_v = apps_k + offset;
+      return std::make_pair(reinterpret_cast(apps_v), size - offset);
+    }
+  }
+  TVM_LOGE_HT("failed to locate apps address for dsp:%p", dsp_addr);
+  return std::make_pair(nullptr, 0);
+}
+
+int HexagonTarget::OpenDomainChannel(bool use_unsigned_pd) {
+  if (domain_channel_handle_ != AEE_EUNKNOWN) return AEE_SUCCESS;
+
+  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
+  const StubAPI* stub_api = StubAPI::Global();
+
+  stub_api->rpcmem_init_ptr()();
+
+  if (auto* rsc_ptr = dsp_api->remote_session_control_ptr(true)) {
+    remote_rpc_thread_params th_data;
+    th_data.domain = CDSP_DOMAIN_ID;
+    th_data.stack_size = kStackSize;
+    th_data.prio = -1;  // Default priority.
+    int rc = rsc_ptr(FASTRPC_THREAD_PARAMS, &th_data, sizeof(th_data));
+    if (rc != AEE_SUCCESS) {
+      TVM_LOGE_HT("remote_session_control failed rc=%08x for stack size", rc);
+    }
+    if (use_unsigned_pd) {
+      remote_rpc_control_unsigned_module data;
+      data.enable = 1;
+      data.domain = CDSP_DOMAIN_ID;
+      int rc = rsc_ptr(DSPRPC_CONTROL_UNSIGNED_MODULE, &data, sizeof(data));
+      if (rc != AEE_SUCCESS) {
+        TVM_LOGE_HT("remote_session_control failed rc=%08x for unsigned PD",
+                    rc);
+      }
+    }
+  } else {
+    TVM_LOGD_HT("remote_session_control not available");
+  }
+
+  int rc = stub_api->tvm_hexagon_remote_open(
+      tvm_hexagon_remote_URI "&_dom=cdsp", &domain_channel_handle_);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("failed to open channel rc=0x%x", rc);
+  } else {
+    count_channel_open_++;
+    TVM_LOGD_HT("channel open success and rpcmem_init done");
+  }
+  return rc;
+}
+
+int HexagonTarget::CloseDomainChannel() {
+  if (domain_channel_handle_ == AEE_EUNKNOWN) return AEE_SUCCESS;
+
+  const StubAPI* stub_api = StubAPI::Global();
+
+  int rc = stub_api->tvm_hexagon_remote_close(domain_channel_handle_);
+  if (rc == AEE_SUCCESS) {
+    domain_channel_handle_ = AEE_EUNKNOWN;
+    stub_api->rpcmem_deinit_ptr()();
+    TVM_LOGD_HT("channel close success and rpcmem_deinit done");
+  } else {
+    TVM_LOGE_HT("failed to close domain channel rc=0x%x", rc);
+  }
+  return rc;
+}
+
+void HexagonTarget::ReleaseLibrary() {
+  crit_section_.lock();
+  if (module_pointer_ != AEE_EUNKNOWN) {
+    const StubAPI* stub_api = StubAPI::Global();
+    int rc = stub_api->tvm_hexagon_remote_release_library(
+        domain_channel_handle_, module_pointer_);
+    if (rc != AEE_SUCCESS) {
+      TVM_LOGE_HT("failed to unload device library rc=0x%x", rc);
+    } else {
+      module_pointer_ = AEE_EUNKNOWN;
+    }
+  }
+  crit_section_.unlock();
+}
+
+void HexagonTarget::FreeMemoryBeforeChannelClose() {
+  while (!dsp_to_apps_.empty()) {
+    void* dsp_addr = const_cast((dsp_to_apps_.begin()->first));
+    TVM_LOGD_HT("Freeing up dsp_addr %p", dsp_addr);
+    HexagonTarget::Free(dsp_addr);
+  }
+}
+void* HexagonTarget::Alloc(unsigned size, unsigned align) {
+  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
+  const StubAPI* stub_api = StubAPI::Global();
+
+  // Opening the domain channel should be done once.
+  crit_section_.lock();
+  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true);
+  crit_section_.unlock();
+  if (rc_oc != AEE_SUCCESS) {
+    TVM_LOGE_HT("mem alloc failed: unable to open domain channel");
+    return nullptr;
+  }
+
+  // This is a workaround. If HexagonTarget::Alloc is called from a different
+  // thread then remote_mmap64 fails. FastRPC expects one call to be made to
+  // DSP before calling remote_map64. Hence this call is needed for now untill
+  // FastRPC comes up with a fix.
+  int rc_call_mmap_64 =
+      stub_api->tvm_hexagon_remote_call_mmap64(domain_channel_handle_);
+  if (rc_call_mmap_64 != AEE_SUCCESS) {
+    TVM_LOGE_HT("mmap64 failed for domain channel %lu", domain_channel_handle_);
+    return nullptr;
+  }
+
+  void* mem =
+      stub_api->rpcmem_alloc_ptr()(RPCMEM_HEAP, RPCMEM_DEFAULT_FLAGS, size);
+  if (mem == nullptr) {
+    TVM_LOGE_HT("mem alloc failed for size=0x%x alignment=0x%x", size, align);
+    return nullptr;
+  }
+  int mem_fd = stub_api->rpcmem_to_fd_ptr()(mem);
+  uintptr_t dsp_va = 0;
+  int rc = dsp_api->remote_mmap64_ptr()(
+      mem_fd, 0, reinterpret_cast(mem), size, &dsp_va);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT(
+        "buffer mapping failed for remote_map64 fd=0x%x rc=0x%x "
+        "apps_addr=0x%lx",
+        mem_fd, rc, reinterpret_cast(mem));
+    return nullptr;
+  }
+
+  void* dsp_addr = reinterpret_cast(dsp_va);
+  AddAddrMapping(dsp_addr, mem, size);
+  return dsp_addr;
+}
+
+void HexagonTarget::Free(void* ptr) {
+  const DspRpcAPI* dsp_api = DspRpcAPI::Global();
+  const StubAPI* stub_api = StubAPI::Global();
+  auto bb = GetAppsAddr(ptr, true);
+  if (bb.first == reinterpret_cast(~0)) {
+    TVM_LOGD_HT("VTCM mapping found. dsp_addr=0x%p", ptr);
+    RemoveAddrMapping(ptr);
+    FreeVtcm(ptr);
+    return;
+  }
+
+  TVM_LOGD_HT("VTCM mapping not found. dsp_addr=0x%p", ptr);
+  auto aa = GetAppsAddr(ptr, true);
+  if (aa.first == nullptr) return;
+
+  int rc = dsp_api->remote_munmap64_ptr()(reinterpret_cast(ptr),
+                                          aa.second);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("buffer unmapping failed rc=0x%x", rc);
+  }
+  RemoveAddrMapping(ptr);
+  stub_api->rpcmem_free_ptr()(aa.first);
+}
+
+void* HexagonTarget::AllocVtcm(unsigned size, unsigned align) {
+  const StubAPI* stub_api = StubAPI::Global();
+
+  unsigned int dsp_va = 0;
+  int rc = stub_api->tvm_hexagon_remote_alloc_vtcm(domain_channel_handle_,
+                                                   size, align, &dsp_va);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("VTCM allocation failed size=%u, align=%u", size, align);
+    return nullptr;
+  }
+  void* dsp_addr = reinterpret_cast(dsp_va);
+  TVM_LOGD_HT("Done vtcm alloc dsp:%p", dsp_addr);
+  AddAddrMapping(dsp_addr, reinterpret_cast(~0), size);
+  return dsp_addr;
+}
+
+void HexagonTarget::FreeVtcm(void* ptr) {
+  const StubAPI* stub_api = StubAPI::Global();
+
+  TVM_LOGD_HT("%s:Calling vtcm free. ptr=%p", __func__, ptr);
+  uintptr_t dsp_va = reinterpret_cast(ptr);
+  int rc =
+      stub_api->tvm_hexagon_remote_free_vtcm(domain_channel_handle_, dsp_va);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("VTCM deallocation failed");
+  }
+  TVM_LOGD_HT("Done VTCM free from HexagonTarget::FreeVtcm");
+}
+
+void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src,
+                                       unsigned len) {}
+
+void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src,
+                                     unsigned len) {
+  auto aa = GetAppsAddr(src, false);
+  if (aa.first == reinterpret_cast(~0)) {
+    TVM_LOGD_HT("VTCM address. Copy operation not needed");
+    return;
+  }
+  if (!aa.first) {
+    TVM_LOGE_HT("copy failed, dsp:%p -> apps:%p, len:%u", src, host_dst, len);
+    return;
+  }
+  if (aa.second < len) {
+    TVM_LOGE_HT(
+        "specified length:%u larger than buffer size:%zu, copy truncated", len,
+        aa.second);
+    len = aa.second;
+  }
+  TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> apps:%p, len:%u", src, aa.first,
+              host_dst, len);
+  std::memcpy(host_dst, aa.first, len);
+}
+
+void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src,
+                                     unsigned len) {
+  auto aa = GetAppsAddr(dst, false);
+  if (aa.first == reinterpret_cast(~0)) {
+    TVM_LOGD_HT("VTCM address. Copy operation not needed");
+    return;
+  }
+  if (!aa.first) {
+    TVM_LOGE_HT("copy failed, dsp:%p <- apps:%p, len:%u", dst, host_src, len);
+    return;
+  }
+  if (aa.second < len) {
+    TVM_LOGE_HT(
+        "specified length:%u larger than buffer size:%zu, copy truncated", len,
+        aa.second);
+    len = aa.second;
+  }
+  TVM_LOGD_HT("copy, dsp:%p(apps:%p) <- apps:%p, len:%u", dst, aa.first,
+              host_src, len);
+  std::memcpy(aa.first, host_src, len);
+}
+
+void* HexagonTarget::Load(const std::string& data, const std::string& fmt) {
+  crit_section_.lock();
+  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true);
+  crit_section_.unlock();
+  if (rc_oc != AEE_SUCCESS) {
+    TVM_LOGE_HT("loading of %s failed: unable to open domain channel",
+                data.c_str());
+    return nullptr;
+  }
+
+  if (domain_channel_handle_ == AEE_EUNKNOWN) return nullptr;
+  ReleaseLibrary();
+
+  crit_section_.lock();
+  TVM_LOGD_HT("loading library %s ", data.c_str());
+  const StubAPI* stub_api = StubAPI::Global();
+  int rc = stub_api->tvm_hexagon_remote_load_library(
+      domain_channel_handle_, data.c_str(), data.size() + 1, data.c_str(),
+      data.size() + 1, &module_pointer_);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("failed to load device library rc=0x%x", rc);
+  }
+  crit_section_.unlock();
+
+  if (module_pointer_ != AEE_EUNKNOWN) {
+    return reinterpret_cast(module_pointer_);
+  } else {
+    return nullptr;
+  }
+}
+
+void HexagonTarget::Unload(void* mod) {
+  crit_section_.lock();
+  count_channel_open_--;
+  crit_section_.unlock();
+  if (count_channel_open_ == 0) FreeMemoryBeforeChannelClose();
+
+  ReleaseLibrary();
+  if (module_pointer_ != AEE_EUNKNOWN) return;
+
+  crit_section_.lock();
+  if (count_channel_open_ == 0) CloseDomainChannel();
+  crit_section_.unlock();
+}
+
+void* HexagonTarget::Resolve(const std::string& sym) {
+  const StubAPI* stub_api = StubAPI::Global();
+
+  tvm_hexagon_remote_handle_t pf;
+  TVM_LOGD_HT("resolving symbol %s", sym.c_str());
+  int rc = stub_api->tvm_hexagon_remote_get_symbol(
+      domain_channel_handle_, module_pointer_, sym.c_str(), sym.size() + 1,
+      &pf);
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("failed to get symbol from CDSP rc=0x%x", rc);
+    return nullptr;
+  }
+  void* addr = reinterpret_cast(pf);
+  TVM_LOGD_HT("resolved %s -> %p", sym.c_str(), addr);
+  return addr;
+}
+
+void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned scalar_num,
+                         uint32_t* stack, unsigned stack_num) {
+  uint64 pcycles = 0, execution_time_usec = 0;
+  auto scalar_octet = std::unique_ptr(
+      new tvm_hexagon_remote_buffer[scalar_num]);
+  auto stack_octet = std::unique_ptr(
+      new tvm_hexagon_remote_buffer[stack_num]);
+  TVM_LOGD_HT("scalars=%p, stack=%p", scalar, stack);
+
+  if (scalar_octet == nullptr || stack_octet == nullptr) {
+    TVM_LOGE_HT("mem alloc failed for scalar/stack octets");
+    return;
+  }
+  std::memset(scalar_octet.get(), 0,
+              scalar_num * sizeof(tvm_hexagon_remote_buffer));
+  std::memset(stack_octet.get(), 0,
+              stack_num * sizeof(tvm_hexagon_remote_buffer));
+
+  auto ProcessInputs = [this](uint32_t* inputs,
+                              tvm_hexagon_remote_buffer* buffers,
+                              unsigned num) {
+    for (unsigned i = 0; i != num; ++i) {
+      void* ptr = reinterpret_cast(static_cast(inputs[i]));
+      auto aa = GetAppsAddr(ptr, false);
+      if (aa.first == reinterpret_cast(~0)) {
+        buffers[i].data = nullptr;
+        buffers[i].dataLen = 0;
+      } else if (aa.first) {
+        buffers[i].data = static_cast(aa.first);
+        buffers[i].dataLen = aa.second;
+      }
+    }
+  };
+
+  ProcessInputs(scalar, scalar_octet.get(), scalar_num);
+  ProcessInputs(stack, stack_octet.get(), stack_num);
+
+  auto ToString = [](const char* title, uint32_t* data, unsigned num) {
+    std::ostringstream log;
+    log << "  " << title << ':' << num << " {" << std::hex;
+    for (unsigned i = 0; i != num; ++i) log << ' ' << data[i];
+    log << " }";
+    return log.str();
+  };
+
+  TVM_LOGD_HT("%s", ToString("scalars", scalar, scalar_num).c_str());
+  TVM_LOGD_HT("%s", ToString("  stack", stack, stack_num).c_str());
+
+  const StubAPI* stub_api = StubAPI::Global();
+  int rc = stub_api->tvm_hexagon_remote_kernel(
+      domain_channel_handle_, module_pointer_,
+      static_cast(
+          reinterpret_cast(func)),
+      reinterpret_cast(scalar), scalar_num,
+      reinterpret_cast(stack), stack_num, scalar_octet.get(), scalar_num,
+      scalar_octet.get(), scalar_num, stack_octet.get(), stack_num,
+      stack_octet.get(), stack_num, &pcycles, &execution_time_usec);
+
+  if (rc != AEE_SUCCESS) {
+    TVM_LOGE_HT("failed to run kernel on CDSP rc=0x%x", rc);
+  } else {
+    TVM_LOGD_HT("kernel execution: %llu pcycles, %llu usec, scalar_num=%d",
+                pcycles, execution_time_usec, scalar_num);
+  }
+}
+
+}  // namespace hexagon
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // #ifdef __ANDROID__
diff --git a/src/runtime/hexagon/target/hexagon_dsprpcapi.cc b/src/runtime/hexagon/target/hexagon_dsprpcapi.cc
new file mode 100644
index 000000000000..bf10feb652cd
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_dsprpcapi.cc
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef __ANDROID__
+#include "hexagon_dsprpcapi.h"
+
+#include 
+#include 
+#include 
+
+#include "hexagon_target_log.h"
+
+namespace tvm {
+namespace runtime {
+
+namespace hexagon {
+
+DspRpcAPI::DspRpcAPI() {
+  CHECK(lib_handle_ = dlopen(rpc_lib_name_, RTLD_LAZY | RTLD_LOCAL));
+
+#define RESOLVE(n) n##_ = GetSymbol(#n)
+  RESOLVE(remote_handle_close);
+  RESOLVE(remote_handle_control);
+  RESOLVE(remote_handle_invoke);
+  RESOLVE(remote_handle_open);
+  RESOLVE(remote_mmap);
+  RESOLVE(remote_munmap);
+
+  RESOLVE(remote_handle64_close);
+  RESOLVE(remote_handle64_control);
+  RESOLVE(remote_handle64_invoke);
+  RESOLVE(remote_handle64_open);
+  RESOLVE(remote_mmap64);
+  RESOLVE(remote_munmap64);
+
+  RESOLVE(remote_register_buf);
+  RESOLVE(remote_register_buf_attr);
+  RESOLVE(remote_register_dma_handle);
+  RESOLVE(remote_register_dma_handle_attr);
+  RESOLVE(remote_register_fd);
+
+  RESOLVE(remote_session_control);
+  RESOLVE(remote_set_mode);
+
+  RESOLVE(rpcmem_init);
+  RESOLVE(rpcmem_deinit);
+  RESOLVE(rpcmem_alloc);
+  RESOLVE(rpcmem_free);
+  RESOLVE(rpcmem_to_fd);
+#undef RESOLVE
+}
+
+DspRpcAPI::~DspRpcAPI() {
+  if (lib_handle_) dlclose(lib_handle_);
+}
+
+template 
+T DspRpcAPI::GetSymbol(const char* sym) {
+  if (!lib_handle_) {
+    TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym);
+    return nullptr;
+  }
+  dlerror();  // Clear any previous errror conditions.
+  if (T ret = reinterpret_cast(dlsym(lib_handle_, sym))) {
+    return ret;
+  }
+
+  const char* err = dlerror();
+  const char* err_txt = err ? err : "symbol not found";
+  TVM_LOGD("error looking up symbol \"%s\": %s", sym, err_txt);
+  return nullptr;
+}
+
+const DspRpcAPI* DspRpcAPI::Global() {
+  static const DspRpcAPI dsp_api;
+  return &dsp_api;
+}
+
+}  // namespace hexagon
+
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // __ANDROID__
diff --git a/src/runtime/hexagon/target/hexagon_dsprpcapi.h b/src/runtime/hexagon/target/hexagon_dsprpcapi.h
new file mode 100644
index 000000000000..ca812e6c2f1f
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_dsprpcapi.h
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_DSPRPCAPI_H_
+#define TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_DSPRPCAPI_H_
+
+#ifdef __ANDROID__
+#include 
+#include 
+
+#include "remote.h"
+#include "remote64.h"
+#include "rpcmem.h"
+
+namespace tvm {
+namespace runtime {
+
+namespace hexagon {
+
+/*!
+ * Encapsulation of the API of lib(a|c)dsprpc.so (loaded via dlopen), allowing
+ * for having versions of the library that do not implement all of the
+ * functions.
+ *
+ * Functions defined in the DSP RPC library:
+ *   remote_handle_close
+ *   remote_handle_control
+ *   remote_handle_invoke
+ *   remote_handle_open
+ *   remote_mmap
+ *   remote_munmap
+ *
+ *   remote_handle64_close
+ *   remote_handle64_control
+ *   remote_handle64_invoke
+ *   remote_handle64_open
+ *   remote_mmap64
+ *   remote_munmap64
+ *
+ *   remote_register_buf
+ *   remote_register_buf_attr
+ *   remote_register_dma_handle
+ *   remote_register_dma_handle_attr
+ *   remote_register_fd
+ *
+ *   remote_session_control
+ *   remote_set_mode
+ *
+ *   rpcmem_init
+ *   rpcmem_deinit
+ *   rpcmem_alloc
+ *   rpcmem_free
+ *   rpcmem_to_fd
+ */
+class DspRpcAPI {
+ public:
+  DspRpcAPI();
+  ~DspRpcAPI();
+
+  using remote_handle = ::remote_handle;
+  using remote_handle64 = ::remote_handle64;
+
+#define DECLTYPE(ty) using ty##_t = decltype(::ty);
+  DECLTYPE(remote_handle_close)
+  DECLTYPE(remote_handle_control)
+  DECLTYPE(remote_handle_invoke)
+  DECLTYPE(remote_handle_open)
+  DECLTYPE(remote_mmap)
+  DECLTYPE(remote_munmap)
+
+  DECLTYPE(remote_handle64_close)
+  DECLTYPE(remote_handle64_control)
+  DECLTYPE(remote_handle64_invoke)
+  DECLTYPE(remote_handle64_open)
+  DECLTYPE(remote_mmap64)
+  DECLTYPE(remote_munmap64)
+
+  DECLTYPE(remote_register_buf)
+  DECLTYPE(remote_register_buf_attr)
+  DECLTYPE(remote_register_dma_handle)
+  DECLTYPE(remote_register_dma_handle_attr)
+  DECLTYPE(remote_register_fd)
+
+  DECLTYPE(remote_session_control)
+  DECLTYPE(remote_set_mode)
+
+  DECLTYPE(rpcmem_init)
+  DECLTYPE(rpcmem_deinit)
+  DECLTYPE(rpcmem_alloc)
+  DECLTYPE(rpcmem_free)
+  DECLTYPE(rpcmem_to_fd)
+#undef DECLTYPE
+
+#define DECLFUNC(fn)                                   \
+  fn##_t* fn##_ptr(bool allow_nullptr = false) const { \
+    if (!allow_nullptr) CHECK(fn##_ != nullptr);       \
+    return fn##_;                                      \
+  }
+  DECLFUNC(remote_handle_close)
+  DECLFUNC(remote_handle_control)
+  DECLFUNC(remote_handle_invoke)
+  DECLFUNC(remote_handle_open)
+  DECLFUNC(remote_mmap)
+  DECLFUNC(remote_munmap)
+
+  DECLFUNC(remote_handle64_close)
+  DECLFUNC(remote_handle64_control)
+  DECLFUNC(remote_handle64_invoke)
+  DECLFUNC(remote_handle64_open)
+  DECLFUNC(remote_mmap64)
+  DECLFUNC(remote_munmap64)
+
+  DECLFUNC(remote_register_buf)
+  DECLFUNC(remote_register_buf_attr)
+  DECLFUNC(remote_register_dma_handle)
+  DECLFUNC(remote_register_dma_handle_attr)
+  DECLFUNC(remote_register_fd)
+
+  DECLFUNC(remote_session_control)
+  DECLFUNC(remote_set_mode)
+
+  DECLFUNC(rpcmem_init)
+  DECLFUNC(rpcmem_deinit)
+  DECLFUNC(rpcmem_alloc)
+  DECLFUNC(rpcmem_free)
+  DECLFUNC(rpcmem_to_fd)
+#undef DECLFUNC
+
+  static const DspRpcAPI* Global();
+
+ private:
+  static constexpr const char* rpc_lib_name_ = "libadsprpc.so";
+  void* lib_handle_ = nullptr;
+
+#define DECLPTR(p) p##_t* p##_ = nullptr;
+  DECLPTR(remote_handle_close)
+  DECLPTR(remote_handle_control)
+  DECLPTR(remote_handle_invoke)
+  DECLPTR(remote_handle_open)
+  DECLPTR(remote_mmap)
+  DECLPTR(remote_munmap)
+
+  DECLPTR(remote_handle64_close)
+  DECLPTR(remote_handle64_control)
+  DECLPTR(remote_handle64_invoke)
+  DECLPTR(remote_handle64_open)
+  DECLPTR(remote_mmap64)
+  DECLPTR(remote_munmap64)
+
+  DECLPTR(remote_register_buf)
+  DECLPTR(remote_register_buf_attr)
+  DECLPTR(remote_register_dma_handle)
+  DECLPTR(remote_register_dma_handle_attr)
+  DECLPTR(remote_register_fd)
+
+  DECLPTR(remote_session_control)
+  DECLPTR(remote_set_mode)
+
+  DECLPTR(rpcmem_init)
+  DECLPTR(rpcmem_deinit)
+  DECLPTR(rpcmem_alloc)
+  DECLPTR(rpcmem_free)
+  DECLPTR(rpcmem_to_fd)
+#undef DECLPTR
+
+  template 
+  T GetSymbol(const char* sym);
+};
+
+}  // namespace hexagon
+
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // __ANDROID__
+#endif  // TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_DSPRPCAPI_H_
diff --git a/src/runtime/hexagon/target/hexagon_stubapi.cc b/src/runtime/hexagon/target/hexagon_stubapi.cc
new file mode 100644
index 000000000000..3600640e89b7
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_stubapi.cc
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifdef __ANDROID__
+#include "hexagon_stubapi.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include "hexagon_target_log.h"
+
+namespace tvm {
+namespace runtime {
+namespace hexagon {
+
+StubAPI::StubAPI() {
+  struct stat sb;
+  if (!stat("/dev/subsys_cdsp", &sb)) {
+    enable_domains_ = true;
+    TVM_LOGD("CDSP subsystem present");
+  } else if (!stat("/dev/subsys_adsp", &sb)) {
+    enable_domains_ = false;
+    TVM_LOGD("ADSP subsystem present");
+  }
+
+  constexpr auto domain_lib_name = "libtvm_hexagon_remote_stub.so";
+  constexpr auto nondomain_lib_name = "libtvm_hexagon_remote_nd_stub.so";
+
+  const char* lib_name =
+      enable_domains_ ? domain_lib_name : nondomain_lib_name;
+  CHECK(lib_handle_ = dlopen(lib_name, RTLD_LAZY | RTLD_LOCAL));
+
+#define RESOLVE(fn) p##fn##_ = GetSymbol(#fn)
+  if (enable_domains_) {
+    RESOLVE(tvm_hexagon_remote_load_library);
+    RESOLVE(tvm_hexagon_remote_release_library);
+    RESOLVE(tvm_hexagon_remote_get_symbol);
+    RESOLVE(tvm_hexagon_remote_kernel);
+    RESOLVE(tvm_hexagon_remote_open);
+    RESOLVE(tvm_hexagon_remote_close);
+    RESOLVE(tvm_hexagon_remote_alloc_vtcm);
+    RESOLVE(tvm_hexagon_remote_free_vtcm);
+    RESOLVE(tvm_hexagon_remote_call_mmap64);
+  } else {
+    RESOLVE(tvm_hexagon_remote_nd_load_library);
+    RESOLVE(tvm_hexagon_remote_nd_release_library);
+    RESOLVE(tvm_hexagon_remote_nd_get_symbol);
+    RESOLVE(tvm_hexagon_remote_nd_kernel);
+    RESOLVE(tvm_hexagon_remote_nd_open);
+    RESOLVE(tvm_hexagon_remote_nd_call_mmap64);
+  }
+
+  RESOLVE(rpcmem_init);
+  RESOLVE(rpcmem_deinit);
+  RESOLVE(rpcmem_alloc);
+  RESOLVE(rpcmem_free);
+  RESOLVE(rpcmem_to_fd);
+#undef RESOLVE
+}
+
+StubAPI::~StubAPI() {
+  if (lib_handle_) dlclose(lib_handle_);
+}
+
+template 
+T StubAPI::GetSymbol(const char* sym) {
+  if (!lib_handle_) {
+    TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym);
+    return nullptr;
+  }
+  dlerror();  // Clear any previous errror conditions.
+  if (T ret = reinterpret_cast(dlsym(lib_handle_, sym))) {
+    return ret;
+  }
+
+  const char* err = dlerror();
+  const char* err_txt = err ? err : "symbol not found";
+  TVM_LOGE("error looking up symbol \"%s\": %s", sym, err_txt);
+  return nullptr;
+}
+
+const StubAPI* StubAPI::Global() {
+  static const StubAPI stub_api;
+  return &stub_api;
+}
+
+}  // namespace hexagon
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // __ANDROID__
diff --git a/src/runtime/hexagon/target/hexagon_stubapi.h b/src/runtime/hexagon/target/hexagon_stubapi.h
new file mode 100644
index 000000000000..ef3dcfdbcc79
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_stubapi.h
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_STUBAPI_H_
+#define TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_STUBAPI_H_
+
+#ifdef __ANDROID__
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "fastrpc/tvm_hexagon_remote.h"
+#include "fastrpc/tvm_hexagon_remote_nd.h"
+
+namespace tvm {
+namespace runtime {
+namespace hexagon {
+
+/*!
+ * Unify the handling of domain and non-domain functions.
+ *
+ * In most cases, for a function "foo", the domain version will be called
+ * "tvm_hexagon_remote_foo", and the non-domain version will have "nd_foo".
+ * The interfaces will be the same, except:
+ * - the domain version will take "remote_handle64" as the first parameter,
+ *   while the non-domain version will not:
+ *   int tvm_hexagon_remote_foo   (remote_handle64 h, param1, param2, ...);
+ *   int tvm_hexagon_remote_nd_foo                   (param1, param2, ...);
+ * - any parameter of type "buffer" in the IDL, will be converted into a
+ *   type "tvm_hexagon_remote_buffer" for domain functions, and into
+ *   "tvm_hexagon_remote_nd_buffer" for non-domain functions. These two
+ *   types are identical, but since they are declared in two different IDLs,
+ *   they get different names.
+ *
+ * For any function, only a pointer to the "buffer" type is passed, but
+ * since the pointee types are different, this is enough to create a
+ * difference in the function signatures even if the "remote_handle64"
+ * parameter is ignored. For this reason, in all function types, the
+ * types "tvm_hexagon_remote_buffer *" and "tvm_hexagon_remote_nd_buffer *",
+ * both const and non-const, are replaced with "void *", with the
+ * corresponding const-qualification. This is done by the templates
+ * "replace_pointee_type" and "map_tuple_element" below.
+ *
+ * The following functions are subject to the uniform handling:
+ *
+ *   tvm_hexagon_remote_load_library     (remote_handle64 h, p1, p2, ...)
+ *   tvm_hexagon_remote_release_library
+ *   tvm_hexagon_remote_get_symbol
+ *   tvm_hexagon_remote_kernel
+ *   tvm_hexagon_remote_close
+ *   tvm_hexagon_remote_alloc_vtcm
+ *   tvm_hexagon_remote_free_vtcm
+ *
+ *   tvm_hexagon_remote_nd_load_library  (p1, p2, ...)
+ *   tvm_hexagon_remote_nd_release_library
+ *   tvm_hexagon_remote_nd_get_symbol
+ *   tvm_hexagon_remote_nd_kernel
+ *   tvm_hexagon_remote_nd_close
+ *
+ * The "open" functions differ in their parameters in different ways, and
+ * need to be handled individually.
+ *
+ *   tvm_hexagon_remote_open
+ *   tvm_hexagon_remote_nd_open
+ */
+
+namespace {
+/*!
+ * replace_pointee_type
+ *
+ * If T is a pointer to a potentially const-qualified M, then replace
+ * M in T with V. Otherwise, leave T unchanged.
+ */
+template 
+struct replace_pointee_type {
+  using type = T;
+};
+
+template 
+struct replace_pointee_type {
+  using type = V*;
+};
+
+template 
+struct replace_pointee_type {
+  using type = const V*;
+};
+
+/*!
+ * map_tuple_elements>
+ *
+ * From given tuple , form another tuple where for each A in As,
+ * if A contains a pointer to M, the pointer is replaced with a pointer
+ * to V, leaving other types unchanged.
+ */
+template 
+struct map_tuple_elements;
+
+template 
+struct map_tuple_elements> {
+  using type = std::tuple::type...>;
+};
+
+/*!
+ * map_func_type
+ *
+ * Given function type F = R(As...), form another function type by replacing
+ * each pointer to M with a pointer to V.
+ */
+template 
+struct map_func_type {
+  template 
+  struct func_to_tuple;
+  template 
+  struct func_to_tuple {
+    using args = std::tuple;
+    using ret = R;
+  };
+
+  template 
+  struct tuple_to_func;
+  template 
+  struct tuple_to_func> {
+    using func = R(As...);
+  };
+
+  using arg_tuple = typename func_to_tuple::args;
+  using ret_type = typename func_to_tuple::ret;
+  using mapped_args = typename map_tuple_elements::type;
+  using type = typename tuple_to_func::func;
+};
+}  // namespace
+
+class StubAPI {
+ public:
+  StubAPI();
+  ~StubAPI();
+
+ private:
+  // Create types for each remote function. For functions that take
+  // a pointer to tvm_hexagon_remote_buffer or tvm_hexagon_remote_nd_buffer,
+  // replace that pointer with pointer to void to make pointers to these
+  // two types identical in the function types created below.
+  // For example, int foo(tvm_hexagon_remote_buffer*) and
+  // int bar(tvm_hexagon_remote_nd_buffer*) should both have the same type.
+#define MAPTYPE(fn, ty) \
+  using fn##_t = typename map_func_type::type;
+  MAPTYPE(tvm_hexagon_remote_load_library, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_release_library, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_get_symbol, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_kernel, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_close, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_alloc_vtcm, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_free_vtcm, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_call_mmap64, tvm_hexagon_remote_buffer)
+
+  MAPTYPE(tvm_hexagon_remote_nd_load_library, tvm_hexagon_remote_nd_buffer)
+  MAPTYPE(tvm_hexagon_remote_nd_release_library, tvm_hexagon_remote_nd_buffer)
+  MAPTYPE(tvm_hexagon_remote_nd_get_symbol, tvm_hexagon_remote_nd_buffer)
+  MAPTYPE(tvm_hexagon_remote_nd_kernel, tvm_hexagon_remote_nd_buffer)
+  MAPTYPE(tvm_hexagon_remote_nd_close, tvm_hexagon_remote_buffer)
+  MAPTYPE(tvm_hexagon_remote_nd_call_mmap64, tvm_hexagon_remote_buffer)
+#undef MAPTYPE
+
+  // For remote functions whose prototypes differ significantly between
+  // the domain and non-domain versions, create the types directly.
+#define DECLTYPE(fn) using fn##_t = decltype(::fn);
+  DECLTYPE(tvm_hexagon_remote_open)
+  DECLTYPE(tvm_hexagon_remote_nd_open)
+
+  DECLTYPE(rpcmem_init)
+  DECLTYPE(rpcmem_deinit)
+  DECLTYPE(rpcmem_alloc)
+  DECLTYPE(rpcmem_free)
+  DECLTYPE(rpcmem_to_fd)
+#undef DECLTYPE
+
+ public:
+  template 
+  int invoke(Fd func_d, Fnd func_nd, remote_handle64 handle,
+             Ts... args) const {
+    if (enable_domains_) {
+      return func_d(handle, args...);
+    }
+    return func_nd(args...);
+  }
+  template 
+  int invoke_d(Fd func_d, remote_handle64 handle, Ts... args) const {
+    if (enable_domains_) {
+      return func_d(handle, args...);
+    }
+    return 0;
+  }
+
+#define CONCAT_STR_FOR_REAL(a, b) a##b
+#define CONCAT_STR(a, b) CONCAT_STR_FOR_REAL(a, b)
+
+#define FUNC(name) CONCAT_STR(tvm_hexagon_remote_, name)
+#define FUNC_D(name) CONCAT_STR(tvm_hexagon_remote_, name)
+#define FUNC_ND(name) CONCAT_STR(tvm_hexagon_remote_nd_, name)
+#define PTRNAME(fn) CONCAT_STR(p, CONCAT_STR(fn, _))
+
+#define DECLFUNC(name)                                                   \
+  template                                               \
+  int FUNC(name)(remote_handle64 handle, Ts... args) const {             \
+    return invoke(PTRNAME(FUNC_D(name)), PTRNAME(FUNC_ND(name)), handle, \
+                  args...);                                              \
+  }
+
+#define DECLFUNC_D(name)                                     \
+  template                                   \
+  int FUNC(name)(remote_handle64 handle, Ts... args) const { \
+    return invoke_d(PTRNAME(FUNC_D(name)), handle, args...); \
+  }
+
+  DECLFUNC(load_library)
+  DECLFUNC(release_library)
+  DECLFUNC(get_symbol)
+  DECLFUNC(kernel)
+  DECLFUNC(close)
+  DECLFUNC_D(alloc_vtcm)
+  DECLFUNC_D(free_vtcm)
+  DECLFUNC(call_mmap64)
+#undef DECLFUNC
+
+// Implementations provided here in case the target does not have these
+// in lib[ac]dsprpc.so.
+#define DECLSFUNC(fn) \
+  fn##_t* fn##_ptr() const { return p##fn##_; }
+  DECLSFUNC(rpcmem_init)
+  DECLSFUNC(rpcmem_deinit)
+  DECLSFUNC(rpcmem_alloc)
+  DECLSFUNC(rpcmem_free)
+  DECLSFUNC(rpcmem_to_fd)
+#undef DECLSFUNC
+#undef DECLFUNC_D
+
+  int tvm_hexagon_remote_open(const char* uri, remote_handle64* handle) const {
+    if (enable_domains_) {
+      return PTRNAME(tvm_hexagon_remote_open)(uri, handle);
+    }
+    return PTRNAME(tvm_hexagon_remote_nd_open)();
+  }
+
+  static const StubAPI* Global();
+
+ private:
+  bool enable_domains_ = true;
+  void* lib_handle_ = nullptr;
+
+#define DECLPTR(fn) fn##_t* PTRNAME(fn) = nullptr
+  DECLPTR(tvm_hexagon_remote_load_library);
+  DECLPTR(tvm_hexagon_remote_release_library);
+  DECLPTR(tvm_hexagon_remote_get_symbol);
+  DECLPTR(tvm_hexagon_remote_kernel);
+  DECLPTR(tvm_hexagon_remote_open);
+  DECLPTR(tvm_hexagon_remote_close);
+  DECLPTR(tvm_hexagon_remote_alloc_vtcm);
+  DECLPTR(tvm_hexagon_remote_free_vtcm);
+  DECLPTR(tvm_hexagon_remote_call_mmap64);
+
+  DECLPTR(tvm_hexagon_remote_nd_load_library);
+  DECLPTR(tvm_hexagon_remote_nd_release_library);
+  DECLPTR(tvm_hexagon_remote_nd_get_symbol);
+  DECLPTR(tvm_hexagon_remote_nd_kernel);
+  DECLPTR(tvm_hexagon_remote_nd_open);
+  DECLPTR(tvm_hexagon_remote_nd_close);
+  DECLPTR(tvm_hexagon_remote_nd_call_mmap64);
+#undef DECLPTR
+
+// "System" functions.
+#define DECLSPTR(fn) fn##_t* p##fn##_ = nullptr;
+  // Implementations provided here in case the target does not have these
+  // in lib[ac]dsprpc.so.
+  DECLSPTR(rpcmem_init);
+  DECLSPTR(rpcmem_deinit);
+  DECLSPTR(rpcmem_alloc);
+  DECLSPTR(rpcmem_free);
+  DECLSPTR(rpcmem_to_fd);
+#undef DECLSPTR
+
+#undef PTRNAME
+#undef FUNC_ND
+#undef FUNC_D
+#undef FUNC
+#undef CONCAT_STR
+#undef CONCAT_STR_FOR_REAL
+
+  template 
+  T GetSymbol(const char* sym);
+};
+
+}  // namespace hexagon
+
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // __ANDROID__
+#endif  // TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_STUBAPI_H_
diff --git a/src/runtime/hexagon/target/hexagon_target_log.h b/src/runtime/hexagon/target/hexagon_target_log.h
new file mode 100644
index 000000000000..ae09503cd35b
--- /dev/null
+++ b/src/runtime/hexagon/target/hexagon_target_log.h
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_TARGET_LOG_H_
+#define TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_TARGET_LOG_H_
+#ifdef __ANDROID__
+
+#include 
+
+#define TVM_LOGV(...) \
+  __android_log_print(ANDROID_LOG_VERBOSE, "TVM", ##__VA_ARGS__)
+#define TVM_LOGD(...) \
+  __android_log_print(ANDROID_LOG_DEBUG, "TVM", ##__VA_ARGS__)
+#define TVM_LOGI(...) \
+  __android_log_print(ANDROID_LOG_INFO, "TVM", ##__VA_ARGS__)
+#define TVM_LOGW(...) \
+  __android_log_print(ANDROID_LOG_WARN, "TVM", ##__VA_ARGS__)
+#define TVM_LOGE(...) \
+  __android_log_print(ANDROID_LOG_ERROR, "TVM", ##__VA_ARGS__)
+#define TVM_LOGF(...) \
+  __android_log_print(ANDROID_LOG_FATAL, "TVM", ##__VA_ARGS__)
+
+#endif  // __ANDROID__
+#endif  // TVM_RUNTIME_HEXAGON_TARGET_HEXAGON_TARGET_LOG_H_
diff --git a/src/target/opt/build_hexagon_off.cc b/src/target/opt/build_hexagon_off.cc
new file mode 100644
index 000000000000..ce06700222ae
--- /dev/null
+++ b/src/target/opt/build_hexagon_off.cc
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "../source/codegen_source_base.h"
+
+namespace tvm {
+namespace runtime {
+
+Module HexagonModuleCreate(std::string data, std::string fmt,
+                           std::unordered_map fmap,
+                           std::string asm_str, std::string obj_str,
+                           std::string ir_str, std::string bc_str,
+                           const std::set& packed_c_abi) {
+  LOG(WARNING) << "Hexagon runtime is not enabled, return a source module...";
+  return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "hex");
+}
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/target/target.cc b/src/target/target.cc
index ab2077db584c..8fb9cb6fc593 100644
--- a/src/target/target.cc
+++ b/src/target/target.cc
@@ -48,8 +48,8 @@ TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
 /*!
 * \brief Construct a Target node from the given name and options.
 * \param target_name The major target name. Should be one of
-* {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hybrid", "llvm", "metal",
-*  "nvptx", "opencl", "opengl", "rocm", "sdaccel", "stackvm", "vulkan"}
+* {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hexagon", "hybrid", "llvm",
+*  "metal", "nvptx", "opencl", "opengl", "rocm", "sdaccel", "stackvm", "vulkan"}
 * \param options Additional options appended to the target
 * \return The constructed Target
 */
@@ -136,6 +136,9 @@ Target CreateTarget(const std::string& target_name,
     t->device_type = kDLExtDev;
   } else if (target_name == "hybrid") {
     t->device_type = kDLCPU;
+  } else if (target_name == "hexagon") {
+    t->keys_array.push_back(tir::StringImmNode::make("hexagon"));
+    t->device_type = kDLHexagon;
   } else {
     LOG(ERROR) << "Unknown target name " << target_name;
     return target::stackvm();
@@ -336,6 +339,10 @@ Target stackvm(const std::vector& options) {
 Target ext_dev(const std::vector& options) {
   return CreateTarget("ext_dev", options);
 }
+
+Target hexagon(const std::vector& options) {
+  return CreateTarget("hexagon", options);
+}
 }  // namespace target
 
 BuildConfig BuildConfig::Create() {

From 6b542e046b05d1fde62d5507c109a62da61e334d Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Mon, 6 Apr 2020 12:30:59 -0500
Subject: [PATCH 02/12] Fix pylint complaints

---
 python/tvm/target/target.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index b56a4520cfd2..3852af0fe8e1 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -270,7 +270,7 @@ def hexagon(cpu_ver='v66', sim_args=None, hvx=128):
     valid_hex = ['v60', 'v62', 'v65', 'v66', 'v67', 'v67t']
     try:
         cpu_ver = cpu_ver[cpu_ver.index('v'):].lower()
-        assert(3 <= len(cpu_ver) <= 4)
+        assert 3 <= len(cpu_ver) <= 4
     except:
         msg = '{} is not a valid Hexagon version\nvalid versions include {}'
         raise ValueError(msg.format(cpu_ver, valid_hex)) from None
@@ -280,8 +280,8 @@ def hexagon(cpu_ver='v66', sim_args=None, hvx=128):
     # Target string
     def create_target(cpu_ver):
         target  = ' -target=hexagon'
-        mcpu    = ' -mcpu=hexagon'+cpu_ver
-        mattr   = ''
+        mcpu = ' -mcpu=hexagon' + cpu_ver
+        mattr = ''
         # HVX enable
         if hvx:
             mattr = ' -mattr=+hvx' + cpu_ver + ',+hvx-length' + str(hvx) + 'b'

From ca3f9d16714791f55b7d08a5aecb9c08175ee485 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Mon, 6 Apr 2020 12:38:20 -0500
Subject: [PATCH 03/12] Fix some more pylint complaints

---
 python/tvm/target/target.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index 3852af0fe8e1..886ea5cc029d 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -279,7 +279,7 @@ def hexagon(cpu_ver='v66', sim_args=None, hvx=128):
 
     # Target string
     def create_target(cpu_ver):
-        target  = ' -target=hexagon'
+        target = ' -target=hexagon'
         mcpu = ' -mcpu=hexagon' + cpu_ver
         mattr = ''
         # HVX enable

From 7fc718c88f7aece6ff6278b9ace5c481ca31ab38 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Mon, 6 Apr 2020 14:45:32 -0500
Subject: [PATCH 04/12] Add link to the Hexagon SDK website

---
 src/runtime/hexagon/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runtime/hexagon/README.md b/src/runtime/hexagon/README.md
index 3b52b8321117..77ffa9ff194e 100644
--- a/src/runtime/hexagon/README.md
+++ b/src/runtime/hexagon/README.md
@@ -23,7 +23,7 @@ The Hexagon runtime is a part of the TVM runtime that facilitates communication
 
 The TVM runtime that contains Hexagon runtime is the one executing on host.  In either case, there will need to be a separate TVM runtime (i.e.  the `libtvm_runtime.so` library) compiled for execution on Hexagon.
 
-The prerequisite is to have Hexagon SDK installed, preferably version 3.5.0.
+The prerequisite is to have Hexagon SDK installed, preferably version 3.5.0 or later. The Hexagon SDK can be downloaded from https://developer.qualcomm.com/software/hexagon-dsp-sdk.
 
 ### Compiling TVM runtime for x86
 

From 7d7ca2ce5b5d80a067b5c49c5ae5b89e627b723f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 7 Apr 2020 08:31:59 -0500
Subject: [PATCH 05/12] Extract VTCM marker into a common variable

---
 .../hexagon/target/hexagon_device_target.cc   | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/runtime/hexagon/target/hexagon_device_target.cc b/src/runtime/hexagon/target/hexagon_device_target.cc
index 9d688c1b3d12..7557da6074b1 100644
--- a/src/runtime/hexagon/target/hexagon_device_target.cc
+++ b/src/runtime/hexagon/target/hexagon_device_target.cc
@@ -87,15 +87,18 @@ class HexagonTarget : public tvm::runtime::hexagon::Device {
   // Using void* pointers is ok, since DSP pointers will always fit
   // in apps's pointers, i.e. sizeof_dsp(void*) <= sizeof_apps(void*).
   std::map> dsp_to_apps_;
-  std::map> vtcm_addr_;
   remote_handle64 domain_channel_handle_ = AEE_EUNKNOWN;
   tvm_hexagon_remote_handle_t module_pointer_ = AEE_EUNKNOWN;
   uint64_t count_channel_open_ = 0;
   // Global lock, used for all critical sections. This can be refined
   // in the future.
   mutable std::mutex crit_section_;
+
+  static void* const vtcm_mark_;
 };
 
+void* const HexagonTarget::vtcm_mark_ = reinterpret_cast(~0);
+
 std::shared_ptr CreateHexagonTarget() {
   return std::make_shared();
 }
@@ -243,6 +246,7 @@ void HexagonTarget::FreeMemoryBeforeChannelClose() {
     HexagonTarget::Free(dsp_addr);
   }
 }
+
 void* HexagonTarget::Alloc(unsigned size, unsigned align) {
   const DspRpcAPI* dsp_api = DspRpcAPI::Global();
   const StubAPI* stub_api = StubAPI::Global();
@@ -294,7 +298,7 @@ void HexagonTarget::Free(void* ptr) {
   const DspRpcAPI* dsp_api = DspRpcAPI::Global();
   const StubAPI* stub_api = StubAPI::Global();
   auto bb = GetAppsAddr(ptr, true);
-  if (bb.first == reinterpret_cast(~0)) {
+  if (bb.first == vtcm_mark_) {
     TVM_LOGD_HT("VTCM mapping found. dsp_addr=0x%p", ptr);
     RemoveAddrMapping(ptr);
     FreeVtcm(ptr);
@@ -326,7 +330,7 @@ void* HexagonTarget::AllocVtcm(unsigned size, unsigned align) {
   }
   void* dsp_addr = reinterpret_cast(dsp_va);
   TVM_LOGD_HT("Done vtcm alloc dsp:%p", dsp_addr);
-  AddAddrMapping(dsp_addr, reinterpret_cast(~0), size);
+  AddAddrMapping(dsp_addr, vtcm_mark_, size);
   return dsp_addr;
 }
 
@@ -349,8 +353,8 @@ void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src,
 void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src,
                                      unsigned len) {
   auto aa = GetAppsAddr(src, false);
-  if (aa.first == reinterpret_cast(~0)) {
-    TVM_LOGD_HT("VTCM address. Copy operation not needed");
+  if (aa.first == vtcm_mark_) {
+    TVM_LOGE_HT("VTCM address. Copy operation not supported");
     return;
   }
   if (!aa.first) {
@@ -358,7 +362,7 @@ void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src,
     return;
   }
   if (aa.second < len) {
-    TVM_LOGE_HT(
+    TVM_LOGD_HT(
         "specified length:%u larger than buffer size:%zu, copy truncated", len,
         aa.second);
     len = aa.second;
@@ -371,8 +375,8 @@ void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src,
 void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src,
                                      unsigned len) {
   auto aa = GetAppsAddr(dst, false);
-  if (aa.first == reinterpret_cast(~0)) {
-    TVM_LOGD_HT("VTCM address. Copy operation not needed");
+  if (aa.first == vtcm_mark_) {
+    TVM_LOGE_HT("VTCM address. Copy operation not supported");
     return;
   }
   if (!aa.first) {
@@ -380,7 +384,7 @@ void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src,
     return;
   }
   if (aa.second < len) {
-    TVM_LOGE_HT(
+    TVM_LOGD_HT(
         "specified length:%u larger than buffer size:%zu, copy truncated", len,
         aa.second);
     len = aa.second;
@@ -476,7 +480,7 @@ void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned scalar_num,
     for (unsigned i = 0; i != num; ++i) {
       void* ptr = reinterpret_cast(static_cast(inputs[i]));
       auto aa = GetAppsAddr(ptr, false);
-      if (aa.first == reinterpret_cast(~0)) {
+      if (aa.first == vtcm_mark_) {
         buffers[i].data = nullptr;
         buffers[i].dataLen = 0;
       } else if (aa.first) {

From 19927d4f7e86ca3ed20217594a55aef8375c80e7 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 7 Apr 2020 08:34:31 -0500
Subject: [PATCH 06/12] Implement device->device memory copy

---
 .../hexagon/target/hexagon_device_target.cc   | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/runtime/hexagon/target/hexagon_device_target.cc b/src/runtime/hexagon/target/hexagon_device_target.cc
index 7557da6074b1..585b16fa7a5f 100644
--- a/src/runtime/hexagon/target/hexagon_device_target.cc
+++ b/src/runtime/hexagon/target/hexagon_device_target.cc
@@ -348,7 +348,32 @@ void HexagonTarget::FreeVtcm(void* ptr) {
 }
 
 void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src,
-                                       unsigned len) {}
+                                       unsigned len) {
+  auto aa_src = GetAppsAddr(src, false);
+  auto aa_dst = GetAppsAddr(dst, false);
+  if (aa_src.first == vtcm_mark_ || aa_dst.first == vtcm_mark_) {
+    TVM_LOGE_HT("VTCM address. Copy operation not supported");
+    return;
+  }
+  if (!aa_src.first || !aa_dst.first) {
+    TVM_LOGE_HT("copy failed, dsp:%p -> dsp:%p, len:%u", src, dst, len);
+    return;
+  }
+  if (aa_src.second < len) {
+    TVM_LOGD_HT(
+        "specified length:%u larger than source buffer size:%zu, copy "
+        "truncated", len, aa_src.second);
+  }
+  if (aa_dst.second < len) {
+    TVM_LOGD_HT(
+        "specified length:%u larger than dest buffer size:%zu, copy "
+        "truncated", len, aa_dst.second);
+  }
+  len = std::min({size_t(len), aa_src.second, aa_dst.second});
+  TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> dsp:%p(apps:%p), len:%u",
+              src, aa_src.first, dst, aa_dst.first, len);
+  std::memcpy(aa_dst.first, aa_src.first, len);
+}
 
 void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src,
                                      unsigned len) {

From 8bd81d130fbe20cc64cf6aeba47865cc968c8921 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 7 Apr 2020 10:19:03 -0500
Subject: [PATCH 07/12] Disable unsigned PDs by default

---
 src/runtime/hexagon/target/hexagon_device_target.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/runtime/hexagon/target/hexagon_device_target.cc b/src/runtime/hexagon/target/hexagon_device_target.cc
index 585b16fa7a5f..00ca49ea9797 100644
--- a/src/runtime/hexagon/target/hexagon_device_target.cc
+++ b/src/runtime/hexagon/target/hexagon_device_target.cc
@@ -94,6 +94,9 @@ class HexagonTarget : public tvm::runtime::hexagon::Device {
   // in the future.
   mutable std::mutex crit_section_;
 
+  // Don't use unsigned PDs by default. Change this to "true" to enable.
+  static constexpr bool unsigned_pd = false;
+
   static void* const vtcm_mark_;
 };
 
@@ -253,7 +256,7 @@ void* HexagonTarget::Alloc(unsigned size, unsigned align) {
 
   // Opening the domain channel should be done once.
   crit_section_.lock();
-  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true);
+  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ unsigned_pd);
   crit_section_.unlock();
   if (rc_oc != AEE_SUCCESS) {
     TVM_LOGE_HT("mem alloc failed: unable to open domain channel");
@@ -421,7 +424,7 @@ void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src,
 
 void* HexagonTarget::Load(const std::string& data, const std::string& fmt) {
   crit_section_.lock();
-  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true);
+  int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ unsigned_pd);
   crit_section_.unlock();
   if (rc_oc != AEE_SUCCESS) {
     TVM_LOGE_HT("loading of %s failed: unable to open domain channel",

From e6b623d339dbeef57a04bedf9bc13cf78dff3740 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 8 Apr 2020 08:44:39 -0500
Subject: [PATCH 08/12] Ensure that --hvx_length is present in sim_args if HVX
 is enabled

---
 python/tvm/target/target.py                   | 27 ++++++++++++-------
 src/runtime/hexagon/sim/hexagon_device_sim.cc |  3 ---
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index 886ea5cc029d..c277dc7bc4bc 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -289,8 +289,23 @@ def create_target(cpu_ver):
 
     # Simulator string
     def create_sim(cpu_ver, sim_args):
+        def validate_hvx_length(codegen_hvx, sim_args):
+            if sim_args and '--hvx_length' in sim_args:
+                # If --hvx_length was specified, check HVX length of sim
+                # vs codegen
+                i = sim_args.index('hvx_length') + len('hvx_length') + 1
+                sim_hvx = sim_args[i:i+3]
+                if sim_hvx != str(hvx):
+                    print('WARNING: sim hvx {} and codegen hvx {} mismatch!' \
+                          .format(sim_hvx, hvx))
+            elif hvx != 0:
+                # If --hvx_length was not given, add it if HVX is enabled
+                sim_args = sim_args + ' ' if isinstance(sim_args, str) else ''
+                sim_args += '--hvx_length ' + str(hvx)
+            return sim_args or ''
+
         if not sim_args:
-            return cpu_ver
+            return cpu_ver + ' ' + validate_hvx_length(hvx, sim_args)
 
         sim_cpu = cpu_ver + ' '
 
@@ -319,15 +334,7 @@ def create_sim(cpu_ver, sim_args):
                        cpu_attr['rev'] + ' ' +     \
                        cpu_attr['pre'] + cpu_attr['post']
 
-        # Check HVX length of sim vs codegen
-        if "--hvx_length" in sim_args:
-            i = sim_args.index('hvx_length') + len('hvx_length') + 1
-            sim_hvx = sim_args[i:i+3]
-            if sim_hvx != str(hvx):
-                print('WARNING: sim hvx {} and codegen hvx {} do not match!' \
-                      .format(sim_hvx, hvx))
-
-        return sim_cpu + str(sim_args)
+        return sim_cpu + ' ' + validate_hvx_length(hvx, sim_args)
 
     # Sim args
     os.environ['HEXAGON_SIM_ARGS'] = create_sim(cpu_ver, sim_args)
diff --git a/src/runtime/hexagon/sim/hexagon_device_sim.cc b/src/runtime/hexagon/sim/hexagon_device_sim.cc
index 63eed5eabd9c..b58377baa947 100644
--- a/src/runtime/hexagon/sim/hexagon_device_sim.cc
+++ b/src/runtime/hexagon/sim/hexagon_device_sim.cc
@@ -585,9 +585,6 @@ HexagonSimulator::HexagonSimulator(bool enable_queuing) {
   sim_dev_args_ = detail::non_const_str(app_args);
   CHECKED_CALL(ConfigureAppCommandLine, sim_dev_args_.size(), sim_dev_args_);
 
-  LOG(INFO) << "HexagonSimulator: ConfigureHVXLength: 128";
-  CHECKED_CALL(ConfigureHVXLength, 128);
-
   Configure(sim_args);
 
   CHECKED_CALL(EndOfConfiguration);

From 701b2f1f874c37e43b4baf93a4b746f4d88169e1 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 8 Apr 2020 08:53:41 -0500
Subject: [PATCH 09/12] Remove the line about clang from README.md

Apparently things work with libstdc++.
---
 src/runtime/hexagon/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/runtime/hexagon/README.md b/src/runtime/hexagon/README.md
index 77ffa9ff194e..0d8d81bfd60f 100644
--- a/src/runtime/hexagon/README.md
+++ b/src/runtime/hexagon/README.md
@@ -36,8 +36,6 @@ USE_HEXAGON_DEVICE=sim
 USE_HEXAGON_SDK=/path/to/sdk
 ```
 
-Set the C/C++ compiler to `clang`, and pass `-DCMAKE_CXX_FLAGS='-stdlib=libc++'` to the cmake command.
-
 You can then build the entire TVM with the usual command (e.g. `make`).
 
 ### Compiling TVM runtime for Android

From 8e879f354b5c401f67f5544b61de5d8706deeabd Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 8 Apr 2020 09:07:07 -0500
Subject: [PATCH 10/12] Mention to set USE_RPC=OFF when building
 libtvm_runtime.so for Hexagon

---
 src/runtime/hexagon/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/runtime/hexagon/README.md b/src/runtime/hexagon/README.md
index 0d8d81bfd60f..79b6dc149c52 100644
--- a/src/runtime/hexagon/README.md
+++ b/src/runtime/hexagon/README.md
@@ -61,6 +61,7 @@ The TVM runtime executing on Hexagon does not need to have support for Hexagon d
 
 When configuring TVM (cmake), set the following variables:
 ```
+USE_RPC=OFF
 USE_LLVM=OFF
 USE_HEXAGON_DEVICE=OFF
 USE_HEXAGON_SDK=/path/to/sdk

From 52de89ab84819c3f57865c6a7143e7ef30ea62de Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 8 Apr 2020 09:17:05 -0500
Subject: [PATCH 11/12] Remember to use codegen_hvx in validate_hvx_length

---
 python/tvm/target/target.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
index c277dc7bc4bc..a83ea0c2ab05 100644
--- a/python/tvm/target/target.py
+++ b/python/tvm/target/target.py
@@ -295,13 +295,13 @@ def validate_hvx_length(codegen_hvx, sim_args):
                 # vs codegen
                 i = sim_args.index('hvx_length') + len('hvx_length') + 1
                 sim_hvx = sim_args[i:i+3]
-                if sim_hvx != str(hvx):
+                if sim_hvx != str(codegen_hvx):
                     print('WARNING: sim hvx {} and codegen hvx {} mismatch!' \
-                          .format(sim_hvx, hvx))
-            elif hvx != 0:
+                          .format(sim_hvx, codegen_hvx))
+            elif codegen_hvx != 0:
                 # If --hvx_length was not given, add it if HVX is enabled
                 sim_args = sim_args + ' ' if isinstance(sim_args, str) else ''
-                sim_args += '--hvx_length ' + str(hvx)
+                sim_args += '--hvx_length ' + str(codegen_hvx)
             return sim_args or ''
 
         if not sim_args:

From 2296b94babfd0b9f0ced0fd0d0d8646f4efd7c35 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 9 Apr 2020 08:12:34 -0500
Subject: [PATCH 12/12] Add a line about minimum version of LLVM

---
 src/runtime/hexagon/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/runtime/hexagon/README.md b/src/runtime/hexagon/README.md
index 79b6dc149c52..6641637a0c7d 100644
--- a/src/runtime/hexagon/README.md
+++ b/src/runtime/hexagon/README.md
@@ -25,6 +25,8 @@ The TVM runtime that contains Hexagon runtime is the one executing on host.  In
 
 The prerequisite is to have Hexagon SDK installed, preferably version 3.5.0 or later. The Hexagon SDK can be downloaded from https://developer.qualcomm.com/software/hexagon-dsp-sdk.
 
+It is also recommended to use as recent version of LLVM as possible, version 7.0.0 being the minimum (based on community feedback).
+
 ### Compiling TVM runtime for x86
 
 This will use Hexagon simulator, which is provided in the Hexagon SDK.