Fix docs

Deelvin · Nov 1, 2022 · eb1e257 · eb1e257
1 parent 80463ea
commit eb1e257
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 58 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -91,6 +91,7 @@ def git_describe_version(original_version):
 extensions = [
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
+    "sphinx.ext.autosectionlabel",
     "sphinx.ext.intersphinx",
     "sphinx.ext.napoleon",
     "sphinx.ext.mathjax",

diff --git a/docs/how_to/deploy/adreno.rst b/docs/how_to/deploy/adreno.rst
@@ -15,8 +15,8 @@ deliver high-performance graphics and a rich user experience with low
 power consumption.
 
 This guide will demonstrate :ref:`the benefits of using textures with Adreno<Advantages of the Textures>`,
-:ref:`how to build TVM with OpenCL-SDK <Building TVM for Adreno>` (needed by Adreno devices) and TVM RPC
-enabled. It will also provide :ref:`example code <Build and deploy model for Adreno>` to better understand the differences with compiling and deploying models
+:ref:`how to build TVM with OpenCL-SDK<Building TVM for Adreno>` (needed by Adreno devices) and TVM RPC
+enabled. It will also provide :ref:`example code<Build and deploy model for Adreno>` to better understand the differences with compiling and deploying models
 on Adreno devices.
 
 Advantages of the Textures
@@ -71,9 +71,11 @@ with Android TVM RPC, refer to this guide: `Deploy the Pretrained Model on Andro
 For us to begin with, Android NDK, Android Debug Bridge and OpenCL-SDK must
 be installed and Android part of TVM must be builded.
 
-Read documentation about *Android NDK installation* here: https://developer.android.com/ndk \
+Read documentation about *Android NDK installation* here: https://developer.android.com/ndk
+
 To get access to adb tools you can see *Android Debug Bridge installation* here:
-https://developer.android.com/studio/command-line/adb \  
+https://developer.android.com/studio/command-line/adb
+
 For *OpenCL-SDK installation* please refer to official github repository: https://github.com/KhronosGroup/OpenCL-SDK.git
 
 You can also build the android part of TVM locally. From the root
@@ -102,8 +104,9 @@ For the complete step-py-step process of compiling and deploying models on
 Adreno, including selection of precision, running the inference of the
 model, getting the predictions, and measuring the performance please refer to this tutorial: `How To Deploy model on Adreno <https://tvm.apache.org/docs/how_to/deploy_models/deploy_model_on_adreno.html>`_
 
-| |Android deployment pipeline|
-| *Fig.2 Deployment pipeline on Adreno devices*
+|Android deployment pipeline|
+
+*Fig.2 Deployment pipeline on Adreno devices*
 
 Adreno target
 ~~~~~~~~~~~~~
@@ -183,15 +186,61 @@ The kernels generated this way is actually working with 2d arrays, leveraging te
 
 Precisions
 ~~~~~~~~~~
+The right choice of precision for a specific task can greatly increase the efficiency of the solution,
+shifting the initial balance of precision and speed to the side that is a priority for the problem.
+
+We can choose from *float16*, *float16_acc32* (Mixed Precision), *float32* (standard).
+
+**Float16**
+
+To leverage the GPU hardware capabilities and utilize benefits of half precision computation and memory management,
+we can convert original model having floating points operation to model operating with half precision.
+Choosing lower precision will positively affect the performance of the model, but it may also have a decrease in the accuracy of the model.
+To do the conversion you need to write a simple conversion function and specify the *dtype* value of "float16" before calling the function:
+
+.. code:: python
+
+   def  convert_to_dtype(mod, dtype):
+      # downcast to float16
+      if  dtype == "float16":
+         global  conv2d_acc = "float16"
+         from  tvm.ir  import  IRModule
+         mod = IRModule.from_expr(mod)
+         seq = tvm.transform.Sequential(
+            [
+                  relay.transform.InferType(),
+            ]
+         )
+         with  tvm.transform.PassContext(opt_level=3):
+            mod = seq(mod)
+      return  mod
+
+   dtype="float16"
+   mod = convert_to_dtype(mod["main"], dtype)
+
+We then can compile our model in any convinient way
+
+.. code:: python
+
+   with  tvm.transform.PassContext(opt_level=3):
+       lib = relay.build(
+           mod, target_host=target_host, target=target, params=params
+       )
+
+**float16_acc32 (Mixed Precision)**
 
-We can also set different precision, choosing from *float16*,
-*float16_acc32* (Mixed Precision), *float32*. Choosing lower precision may positively
-affect the performance of the model, but it may also have a decrease in the accuracy of the model.
-In some tasks we may sacrifice accuracy in favor of speed, and in some tasks we may prefer mixed precision,
-obtaining some kind of balance.
+ToMixedPrecision pass traverse over the network and split network to clusters of ops dealing with float or float16 data types.
+The clusters are defined by three types of operations:
+* Operations always be converted into float16 data type
+* Operations which can be converted if they follow by converted cluster
+* Operations never be converted to the float16 data type  
+This list is defined in the ToMixedPrecision implementation here 
+`relay/transform/mixed_precision.py <https://github.com/apache/tvm/blob/main/python/tvm/relay/transform/mixed_precision.py#L34>`_ 
+and can be overridden by user
 
-First of all, to be able to convert precisions in general, we need to
-register conversion to mixed precision
+In some cases we want to have accumulation type in data type with bigger type of bits that input data type.
+This is supported, for example, for conv2d and dense operations. To override accumulation type you need to register
+function with @register_mixed_precision_conversion decorator to modify parameters of ToMixedPrecision conversion
 
 .. code:: python
 
@@ -222,9 +271,9 @@ register conversion to mixed precision
            mixed_precision_type,
        ]
 
-We then need to create a Relay graph from desired model in any convinient way
-and obtain **mod** (which is IR representation of the model), after which we can convert it to
-required **dtype** and then assemble our model sequentialy
+Now we need to modify the conversion function by adding some logical "forks" and ToMixedPrecision() call,
+then create a Relay graph from desired model in any convinient way and obtain **mod** (which is IR representation of the model),
+after which we can convert it to the required **dtype** and then assemble our model sequentialy
 
 .. code:: python
 
@@ -262,5 +311,5 @@ From this point we can compile our model as normal
            mod, target_host=target_host, target=target, params=params
        )
 
-.. |High-level overview of the Adreno A5x architecture for OpenCL| image:: images/architecture.png
-.. |Android deployment pipeline| image:: images/deployment_pipeline.jpg
+.. |High-level overview of the Adreno A5x architecture for OpenCL| image:: images/adreno_architecture.png
+.. |Android deployment pipeline| image:: images/android_deployment_pipeline.jpg
diff --git a/gallery/how_to/deploy_models/deploy_model_on_adreno.py b/gallery/how_to/deploy_models/deploy_model_on_adreno.py
@@ -3,15 +3,17 @@
 =======================================
 **Author**: Daniil Barinov
 
-This article is a step-by-step tutorial to deploy pretrained MXNet VGG16 model on Adreno (on different precisions).
+This article is a step-by-step tutorial to deploy pretrained Pytorch ResNet-18 model on Adreno (on different precisions).
 
-For us to begin with, MXNet package must be installed.
+For us to begin with, PyTorch must be installed.
+TorchVision is also required since we will be using it as our model zoo.
 
 A quick solution is to install it via pip:
 
 .. code-block:: bash
 
-  pip install mxnet
+  pip install torch
+  pip install torchvision
 
 Besides that, you should have TVM builded for Android.
 See the following instructions on how to build it.
@@ -94,25 +96,62 @@
 #    android      1      1     0
 #    ----------------------------------
 
+######################################################################
+# Load a test image
+# -----------------
+# As an example we would use classical cat image from ImageNet
+
+from PIL import Image
+from tvm.contrib.download import download_testdata
+from matplotlib import pyplot as plt
+import numpy as np
+
+img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_path = download_testdata(img_url, "cat.png", module="data")
+img = Image.open(img_path).resize((224, 224))
+plt.imshow(img)
+plt.show()
+
+# Preprocess the image and convert to tensor
+from torchvision import transforms
+
+my_preprocess = transforms.Compose(
+    [
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ]
+)
+img = my_preprocess(img)
+img = np.expand_dims(img, 0)
 
 ######################################################################
-# Load pretrained MXNet model
+# Load pretrained Pytorch model
 # ---------------------------
-# Create a Relay graph from a MXNet VGG16 model
+# Create a Relay graph from a Pytorch ResNet-18 model
 import os
-import numpy as np
-import mxnet.gluon as gluon
+import torch
+import torchvision
 import tvm
+from tvm import te
 from tvm import relay, rpc
 from tvm.contrib import utils, ndk
 from tvm.contrib import graph_executor
 
-name = "vgg16"
-model = gluon.model_zoo.vision.get_model(name, pretrained=True)
-input_name = "data"
-input_shape = (1, 3, 224, 224)
-shape_dict = {input_name: input_shape}
-mod, params = relay.frontend.from_mxnet(model, shape_dict)
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# We grab the TorchScripted model via tracing
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
+
+# Input name can be arbitrary
+input_name = "input0"
+shape_list = [(input_name, img.shape)]
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
 
 ######################################################################
 # Precisions
@@ -172,42 +211,17 @@ def  convert_to_dtype(mod, dtype):
 # needed to convert to FP16 precision. 
 # You can also use "float16" or "float32" precisions as other dtype options.
 
-######################################################################
-# Load a test image
-# -----------------
-# As an example we would use classical cat image from ImageNet
-
-from  PIL  import  Image
-from  tvm.contrib.download  import  download_testdata
-from matplotlib import pyplot as plt
-
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
-img_path = download_testdata(img_url, "cat.png", module="data")
-img = Image.open(img_path)
-
-plt.imshow(img)
-plt.show()
-
-# Image preprocessing:
-
-img = img.resize(shape_dict[input_name][2:])
-img = np.array(img) - np.array([123.0, 117.0, 104.0])
-img /= np.array([58.395, 57.12, 57.375])
-img = img.transpose((2, 0, 1))
-img = img[np.newaxis, :]
-
 ######################################################################
 # Compile the model with relay
 # ----------------------------
 # Specify Adreno target before compiling to generate texture 
 # leveraging kernels and get all the benefits of textures 
 
-target="opencl -device=adreno"
-target_host="llvm -mtriple=arm64-linux-android"
+target = tvm.target.Target("opencl -device=adreno", host="llvm -mtriple=arm64-linux-android")
 
 with  tvm.transform.PassContext(opt_level=3):
 	lib = relay.build(
-		mod, target_host=target_host, target=target, params=params
+		mod, target=target, params=params
 	)
 
 ######################################################################