[Marvell BYOC]: Marvell AI Accelerator Integration - Phase 1 (#16570)

apache · Feb 16, 2024 · 5645c52 · 5645c52
1 parent c7e3510
commit 5645c52
Show file tree

Hide file tree

Showing 23 changed files with 3,437 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -128,6 +128,7 @@ tvm_option(USE_CLML_GRAPH_EXECUTOR "Build with CLML graph runtime" OFF)
 tvm_option(USE_UMA "Build with UMA support" OFF)
 tvm_option(USE_VERILATOR "Build with Verilator support" OFF)
 tvm_option(USE_MSC "Enable Multi-System Compiler" OFF)
+tvm_option(USE_MRVL "Build with MRVL TVM support" OFF)
 
 # include directories
 include_directories(${CMAKE_INCLUDE_PATH})
@@ -581,6 +582,7 @@ include(cmake/modules/contrib/vllm.cmake)
 include(cmake/modules/Git.cmake)
 include(cmake/modules/LibInfo.cmake)
 include(cmake/modules/RustExt.cmake)
+include(cmake/modules/contrib/Mrvl.cmake)
 
 set(LIBINFO_FILE ${CMAKE_CURRENT_LIST_DIR}/src/support/libinfo.cc)
 add_lib_info(${LIBINFO_FILE})

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -358,6 +358,9 @@ set(USE_HEXAGON_RPC OFF)
 # Valid values are v65, v66, v68, v69, v73.
 set(USE_HEXAGON_ARCH "v68")
 
+# Whether use MRVL codegen
+set(USE_MRVL OFF)
+
 # Whether to use QHL library
 set(USE_HEXAGON_QHL OFF)
 

diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
@@ -99,6 +99,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_MICRO="${USE_MICRO}"
     TVM_INFO_USE_MIOPEN="${USE_MIOPEN}"
     TVM_INFO_USE_MKL="${USE_MKL}"
+    TVM_INFO_USE_MRVL="${USE_MRVL}"
     TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
     TVM_INFO_USE_NNPACK="${USE_NNPACK}"
     TVM_INFO_USE_OPENCL="${USE_OPENCL}"

diff --git a/cmake/modules/contrib/Mrvl.cmake b/cmake/modules/contrib/Mrvl.cmake
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+include(ExternalProject)
+if(USE_MRVL)
+  # Mrvl Module
+  message(STATUS "Build with Mrvl support")
+  file(GLOB RUNTIME_MRVL_SRCS
+    src/runtime/contrib/mrvl/mrvl_runtime.cc
+  )
+  list(APPEND RUNTIME_SRCS ${RUNTIME_MRVL_SRCS})
+  file(GLOB COMPILER_MRVL_SRCS
+    src/relay/backend/contrib/mrvl/codegen.cc
+    src/relay/backend/contrib/mrvl/compiler_attr.cc
+  )
+  list(APPEND COMPILER_SRCS ${COMPILER_MRVL_SRCS})
+endif(USE_MRVL)
diff --git a/docker/Dockerfile.demo_mrvl b/docker/Dockerfile.demo_mrvl
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# prebuild ci-cpu image
+FROM tlcpack/ci-cpu:20230604-060130-0af9ff90e
diff --git a/docs/how_to/deploy/index.rst b/docs/how_to/deploy/index.rst
@@ -176,6 +176,7 @@ target device without relying on RPC. See the following resources on how to do s
    tensorrt
    vitis_ai
    bnns
+   mrvl
 
 Additional Deployment How-Tos
 -----------------------------

diff --git a/docs/how_to/deploy/mrvl.rst b/docs/how_to/deploy/mrvl.rst
@@ -0,0 +1,235 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+Marvell Machine Learning Integration
+====================================
+
+1. Introduction
+---------------
+Marvell(R) supports a family of high performance Data Processing
+Units (DPUs) with integrated compute, high speed I/O and workload
+accelerators. These workload accelerators includes Marvell's
+Machine Learning Inference Processor (MLIP), a highly optimized,
+integrated inference engine.
+
+TVM supports Marvell's MLIP using the "mrvl" library. This partitions and
+compiles supported operations for accelerated execution on MLIP, or LLVM
+for general compute.
+
+For runtime, the library supports native execution on MLIP hardware
+as well as Marvell's ML simulator (mlModel).
+
+The library supports Marvell's Octeon family of processors with ML accelarators.
+
+This guide demonstrates building TVM with codegen and
+runtime enabled. It also provides example code to compile and run
+models using 'mrvl' runtime.
+
+2. Building TVM with mrvl support
+---------------------------------
+
+2.1 Clone TVM repo
+-------------------
+
+Refer to the following TVM documentation for cloning TVM
+https://tvm.apache.org/docs/install/from_source.html
+
+2.2 Build and start the TVM - mrvl docker container
+----------------------------------------------------
+
+.. code:: bash
+
+    ./docker/build.sh demo_mrvl bash                              # Build the docker container
+    ./docker/bash.sh tvm.demo_mrvl --env PYTHONPATH=$PWD/python   # Load the docker image
+
+
+3. Build TVM inside the docker container with mrvl (inside tvm directory)
+-------------------------------------------------------------------------
+
+.. code:: bash
+
+      ./tests/scripts/task_config_build_mrvl.sh build
+      cd build
+      cmake ..
+      make -j$(nproc)   # nproc = 4/8/..  (Number of Parallel jobs)
+
+4. Compiling a model using TVMC command line
+--------------------------------------------
+Models can be compiled and run for mrvl target using TVMC
+which is optimized for performance.
+
+Refer to the following TVMC documentation, for tvmc generic options.
+https://tvm.apache.org/docs/tutorial/tvmc_command_line_driver.html
+
+Additional mrvl-specific options may be added as attributes if
+necessary. The advanced usage is described in this document below.
+
+4.1 TVMC Compilation Flow for a model
+-------------------------------------
+
+Refer to the following TVM documentation, for compilation flow
+https://tvm.apache.org/docs/arch/index.html#example-compilation-flow
+
+
+4.2. TVMC - Command line option(s): Syntax for mrvl target
+----------------------------------------------------------
+
+Compiling an ONNX model using the tvmc for mrvl target.
+
+**Syntax:**
+
+.. code:: python
+
+    python3 -m tvm.driver.tvmc compile --target="mrvl, llvm"
+        --target-llvm-<options>
+        --target-mrvl-<options>
+        --<tvm-generic-options>
+        model_file.onnx
+
+Following is an example TVMC Compile command for an ARMv9 core and
+integrated MLIP cn10ka processor, using only 4 tiles in the block.
+
+**Example:**
+
+.. code:: python
+
+    python3 -m tvm.driver.tvmc compile --target="mrvl, llvm" \
+        --target-llvm-mtriple=aarch64-linux-gnu --target-llvm-mcpu=neoverse-n2 \
+        --target-mrvl-num_tiles=4 \
+        --cross-compiler aarch64-linux-gnu-gcc \
+        --output model.tar \
+        mnist-12.onnx
+
+
+4.3. TVMC Compiler: mrvl specific Command Line Options
+------------------------------------------------------
+
+.. code:: python
+
+  --target-mrvl-mcpu
+  --target-mrvl-num_tiles
+  --target-mrvl-mattr
+
+**Description of mrvl options**
+
+* mcpu:
+    The CPU class of Marvell(R) ML Inference Processor;
+    possible values = {cn10ka, cnf10kb}; defaults to cn10ka
+
+* num_tiles:
+    Maximum number of tiles that may be used, possible values = {1,2,4,8}, defaults to 8
+
+* mattr:
+    Attributes for mrvl; possible values = {quantize, wb_pin_ocm}
+
+    mattr specifies the data type, code generation options and optimizations.
+
+    *List of supported attributes are:*
+
+    **1. quantize**
+
+    Specify the data type. Possible values = {fp16, int8}.
+    Default is fp16, int8 is WIP and full support will be added in a future PR.
+
+    **2. wb_pin_ocm**
+
+    Optimize runtime by preloading a model's weights and bias into
+    the on chip memory. Possible values = {0, 1}. Default is 0 (no preload)
+
+5. Compilation - Generating model partitions
+--------------------------------------------
+
+In the TVMC mrvl flow, the model is partitioned into Marvell and LLVM regions.
+Building each partitioned Marvell subgraph generates serialized nodes.json and
+const.json. Partitioned nodes.json is the representation of the model graph which is
+suitable for the Marvell mmlc compiler. It is distributed separately via CDK
+
+**Model Partition**
+
+.. code:: bash
+
+    python3 -m tvm.driver.tvmc compile --target="mrvl, llvm \
+    -mtriple=aarch64-linux-gnu -mcpu=neoverse-n2" \
+    --cross-compiler aarch64-linux-gnu-gcc \
+    --target-mrvl-num_tiles=4 --output model.tar model.onnx
+
+
+6. Compiling a model using Python APIs
+--------------------------------------
+
+In addition to using TVMC, models can also be compiled and run using
+TVM Python API. Below is an example to compile the MNIST model. Support
+to run the model will be part of next PR by mrvl
+
+**Download MNIST model from the web**
+
+.. code:: bash
+
+    cd $HOME
+    wget https://github.com/onnx/models/raw/main/validated/vision/classification/mnist/model/mnist-12.onnx
+
+**Import the TVM and other dependent modules**
+
+.. code:: python
+
+    import tvm, onnx, os
+    import numpy as np
+    import tvm.relay as relay
+    from tvm.relay.op.contrib.mrvl import partition_for_mrvl
+    from tvm.relay.build_module import build
+    from keras.datasets import mnist
+
+**Load model onnx file**
+
+.. code:: python
+
+    onnx_model = onnx.load("mnist-12.onnx")
+
+**Create a Relay graph from MNIST model**
+
+.. code:: python
+
+    shape_dict = {'Input3' : (1,1,28,28)}
+    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+
+**Define option dictionary and Partition the Model**
+
+Annotate and partition the graph for mrvl. All operations which are supported
+by the mrvl will be marked and offloaded to mrvl hardware accelerator. The rest of the
+operations will go through the regular LLVM compilation and code generation for ARM.
+
+.. code:: python
+
+    tvm_target = "llvm"
+
+    option_dict = {'num_tiles': 4}
+
+    mod = partition_for_mrvl(mod, params, **option_dict)
+
+**Build the Relay Graph**
+
+Build the Relay graph, using the new module returned by partition_for_mrvl.
+The target must always be a LLVM (ARM) target. ``partition_for_mrvl`` will
+pass the options from dictionary into the config parameters needed by the
+compiler backend, so there is no need to modify it - just pass it along
+to the PassContext so the values can be read during compilation.
+
+.. code:: python
+
+    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.mrvl.options" : option_dict}):
+            model_lib = relay.build(mod, tvm_target, params=params)