From c5efdb28f53e151a97dfb2951881dcff4e8044cf Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Mon, 19 Apr 2021 21:01:39 +0800
Subject: [PATCH 1/9] add onnxruntime test tool, update pytorch2onnx to support
 slice export

---
 tools/ort_test.py     | 200 ++++++++++++++++++++++++++++++++++++++++++
 tools/pytorch2onnx.py |  52 +++++++----
 2 files changed, 233 insertions(+), 19 deletions(-)
 create mode 100644 tools/ort_test.py

diff --git a/tools/ort_test.py b/tools/ort_test.py
new file mode 100644
index 0000000000..580e7175e1
--- /dev/null
+++ b/tools/ort_test.py
@@ -0,0 +1,200 @@
+import argparse
+import mmcv
+import numpy as np
+import onnxruntime as ort
+import os
+import os.path as osp
+import torch
+import warnings
+from mmcv.parallel import MMDataParallel
+from mmcv.runner import get_dist_info
+from mmcv.utils import DictAction
+
+from mmseg.apis import single_gpu_test
+from mmseg.datasets import build_dataloader, build_dataset
+from mmseg.models.segmentors.base import BaseSegmentor
+
+
+class ONNXRuntimeDetector(BaseSegmentor):
+
+    def __init__(self, onnx_file, cfg, device_id=None):
+        super(ONNXRuntimeDetector, self).__init__()
+        # get the custom op path
+        ort_custom_op_path = ''
+        try:
+            from mmcv.ops import get_onnxruntime_op_path
+            ort_custom_op_path = get_onnxruntime_op_path()
+        except (ImportError, ModuleNotFoundError):
+            warnings.warn('If input model has custom op from mmcv, \
+                you may have to build mmcv with ONNXRuntime from source.')
+        session_options = ort.SessionOptions()
+        # register custom op for onnxruntime
+        if osp.exists(ort_custom_op_path):
+            session_options.register_custom_ops_library(ort_custom_op_path)
+        sess = ort.InferenceSession(onnx_file, session_options)
+        if device_id is not None:
+            option = {'device_id': device_id, 'cuda_mem_limit': 1 << 30}
+            sess.set_providers(
+                ['CUDAExecutionProvider', 'CPUExecutionProvider'],
+                [option, {}])
+        else:
+            sess.set_providers(['CPUExecutionProvider'])
+
+        self.sess = sess
+        self.device_id = device_id
+        self.io_binding = sess.io_binding()
+        self.output_names = [_.name for _ in sess.get_outputs()]
+        self.cfg = cfg
+        self.test_mode = cfg.model.test_cfg.mode
+
+    def extract_feat(self, imgs):
+        raise NotImplementedError('This method is not implemented.')
+
+    def encode_decode(self, img, img_metas):
+        raise NotImplementedError('This method is not implemented.')
+
+    def forward_train(self, imgs, img_metas, **kwargs):
+        raise NotImplementedError('This method is not implemented.')
+
+    def simple_test(self, img, img_meta, **kwargs):
+        device_type = 'cpu' if self.device_id is None else 'cuda'
+        device_id = self.device_id if self.device_id is not None else 0
+        if device_type == 'cpu':
+            img = img.cpu()
+        self.io_binding.bind_input(
+            name='input',
+            device_type=device_type,
+            device_id=device_id,
+            element_type=np.float32,
+            shape=img.shape,
+            buffer_ptr=img.data_ptr())
+        for name in self.output_names:
+            self.io_binding.bind_output(name)
+        self.sess.run_with_iobinding(self.io_binding)
+        seg_pred = self.io_binding.copy_outputs_to_cpu()[0]
+        if self.test_mode == 'whole':
+            # whole might support dynamic reshape
+            scale_factor = img_meta[0]['scale_factor']
+            if isinstance(scale_factor, float):
+                scale_factor = (scale_factor, ) * 2
+            scale_factor = [1. / scale for scale in scale_factor]
+            if not torch.all(torch.tensor(scale_factor) == 1.):
+                seg_pred = torch.from_numpy(seg_pred).float()
+                seg_pred = torch.nn.functional.interpolate(
+                    seg_pred,
+                    scale_factor=tuple(scale_factor[:2]),
+                    mode='nearest')
+                seg_pred = seg_pred.long().detach().cpu().numpy()
+        seg_pred = seg_pred[0]
+        seg_pred = list(seg_pred)
+        return seg_pred
+
+    def aug_test(self, imgs, img_metas, **kwargs):
+        raise NotImplementedError('This method is not implemented.')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='mmseg onnxruntime backend test (and eval) a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('model', help='Input model file')
+    parser.add_argument('--out', help='output result file in pickle format')
+    parser.add_argument(
+        '--format-only',
+        action='store_true',
+        help='Format the output results without perform evaluation. It is'
+        'useful when you want to format the result to a specific format and '
+        'submit it to the test server')
+    parser.add_argument(
+        '--eval',
+        type=str,
+        nargs='+',
+        help='evaluation metrics, which depends on the dataset, e.g., "mIoU"'
+        ' for generic datasets, and "cityscapes" for Cityscapes')
+    parser.add_argument('--show', action='store_true', help='show results')
+    parser.add_argument(
+        '--show-dir', help='directory where painted images will be saved')
+    parser.add_argument(
+        '--options', nargs='+', action=DictAction, help='custom options')
+    parser.add_argument(
+        '--eval-options',
+        nargs='+',
+        action=DictAction,
+        help='custom options for evaluation')
+    parser.add_argument(
+        '--opacity',
+        type=float,
+        default=0.5,
+        help='Opacity of painted segmentation map. In (0, 1] range.')
+    parser.add_argument('--cpu', action='store_true', help='test use cpu')
+    parser.add_argument('--local_rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+    return args
+
+
+def main():
+    args = parse_args()
+
+    assert args.out or args.eval or args.format_only or args.show \
+        or args.show_dir, \
+        ('Please specify at least one operation (save/eval/format/show the '
+         'results / save the results) with the argument "--out", "--eval"'
+         ', "--format-only", "--show" or "--show-dir"')
+
+    if args.eval and args.format_only:
+        raise ValueError('--eval and --format_only cannot be both specified')
+
+    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
+        raise ValueError('The output file must be a pkl file.')
+
+    cfg = mmcv.Config.fromfile(args.config)
+    if args.options is not None:
+        cfg.merge_from_dict(args.options)
+    cfg.model.pretrained = None
+    cfg.data.test.test_mode = True
+
+    # init distributed env first, since logger depends on the dist info.
+    distributed = False
+
+    # build the dataloader
+    # TODO: support multiple images per gpu (only minor changes are needed)
+    device_id = 0 if not args.cpu else None
+    dataset = build_dataset(cfg.data.test)
+    data_loader = build_dataloader(
+        dataset,
+        samples_per_gpu=1,
+        workers_per_gpu=cfg.data.workers_per_gpu,
+        dist=distributed,
+        shuffle=False)
+
+    # load onnx config and meta
+    cfg.model.train_cfg = None
+    model = ONNXRuntimeDetector(args.model, cfg=cfg, device_id=device_id)
+    model.CLASSES = dataset.CLASSES
+    model.PALETTE = dataset.PALETTE
+
+    efficient_test = False
+    if args.eval_options is not None:
+        efficient_test = args.eval_options.get('efficient_test', False)
+
+    model = MMDataParallel(
+        model, device_ids=[device_id] if device_id is not None else None)
+    outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
+                              efficient_test, args.opacity)
+
+    rank, _ = get_dist_info()
+    if rank == 0:
+        if args.out:
+            print(f'\nwriting results to {args.out}')
+            mmcv.dump(outputs, args.out)
+        kwargs = {} if args.eval_options is None else args.eval_options
+        if args.format_only:
+            dataset.format_results(outputs, **kwargs)
+        if args.eval:
+            dataset.evaluate(outputs, args.eval, **kwargs)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py
index 71f1bb7227..51db279e1b 100644
--- a/tools/pytorch2onnx.py
+++ b/tools/pytorch2onnx.py
@@ -128,6 +128,7 @@ def pytorch2onnx(model,
             Default: False.
     """
     model.cpu().eval()
+    test_mode = model.test_cfg.mode
 
     if isinstance(model.decode_head, nn.ModuleList):
         num_classes = model.decode_head[-1].num_classes
@@ -140,7 +141,9 @@ def pytorch2onnx(model,
 
     img_list = [img[None, :] for img in imgs]
     img_meta_list = [[img_meta] for img_meta in img_metas]
-    img_list, img_meta_list = _update_input_img(img_list, img_meta_list)
+    # update img_meta
+    if test_mode == 'whole':
+        img_list, img_meta_list = _update_input_img(img_list, img_meta_list)
 
     # replace original forward function
     origin_forward = model.forward
@@ -148,18 +151,21 @@ def pytorch2onnx(model,
         model.forward, img_metas=img_meta_list, return_loss=False)
     dynamic_axes = None
     if dynamic_export:
-        dynamic_axes = {
-            'input': {
-                0: 'batch',
-                2: 'height',
-                3: 'width'
-            },
-            'output': {
-                1: 'batch',
-                2: 'height',
-                3: 'width'
+        if test_mode == 'slide':
+            dynamic_axes = {'input': {0: 'batch'}, 'output': {1: 'batch'}}
+        else:
+            dynamic_axes = {
+                'input': {
+                    0: 'batch',
+                    2: 'height',
+                    3: 'width'
+                },
+                'output': {
+                    1: 'batch',
+                    2: 'height',
+                    3: 'width'
+                }
             }
-        }
 
     register_extra_symbolics(opset_version)
     with torch.no_grad():
@@ -184,10 +190,11 @@ def pytorch2onnx(model,
 
         if dynamic_export:
             # scale image for dynamic shape test
-            img_list = [
-                nn.functional.interpolate(_, scale_factor=1.5)
-                for _ in img_list
-            ]
+            if test_mode == 'whole':
+                img_list = [
+                    nn.functional.interpolate(_, scale_factor=1.5)
+                    for _ in img_list
+                ]
             # concate flip image for batch test
             flip_img_list = [_.flip(-1) for _ in img_list]
             img_list = [
@@ -196,8 +203,9 @@ def pytorch2onnx(model,
             ]
 
             # update img_meta
-            img_list, img_meta_list = _update_input_img(
-                img_list, img_meta_list)
+            if test_mode == 'whole':
+                img_list, img_meta_list = _update_input_img(
+                    img_list, img_meta_list)
 
         # check the numerical value
         # get pytorch output
@@ -309,6 +317,8 @@ def parse_args():
         cfg.merge_from_dict(args.cfg_options)
     cfg.model.pretrained = None
 
+    test_mode = cfg.model.test_cfg.mode
+
     # build the model and load checkpoint
     cfg.model.train_cfg = None
     segmentor = build_segmentor(
@@ -324,8 +334,12 @@ def parse_args():
 
     # read input or create dummpy input
     if args.input_img is not None:
+        preprocess_shape = (input_shape[3], input_shape[2])
+        if test_mode == 'slide':
+            # slide mode does not support dynamic shape
+            preprocess_shape = None
         mm_inputs = _prepare_input_img(args.input_img, cfg.data.test.pipeline,
-                                       (input_shape[3], input_shape[2]))
+                                       preprocess_shape)
     else:
         if isinstance(segmentor.decode_head, nn.ModuleList):
             num_classes = segmentor.decode_head[-1].num_classes

From 2284a0976860155ab9bb56bf0d525be2461e1a16 Mon Sep 17 00:00:00 2001
From: grimoire <streetyao@live.com>
Date: Tue, 20 Apr 2021 07:44:07 +0800
Subject: [PATCH 2/9] onnx convert with custom output shape, update test code

---
 tools/ort_test.py     | 23 +++++++----------
 tools/pytorch2onnx.py | 60 ++++++++++++++++++++++++-------------------
 2 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/tools/ort_test.py b/tools/ort_test.py
index 580e7175e1..da04a1d8cd 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -33,7 +33,7 @@ def __init__(self, onnx_file, cfg, device_id=None):
             session_options.register_custom_ops_library(ort_custom_op_path)
         sess = ort.InferenceSession(onnx_file, session_options)
         if device_id is not None:
-            option = {'device_id': device_id, 'cuda_mem_limit': 1 << 30}
+            option = {'device_id': device_id}
             sess.set_providers(
                 ['CUDAExecutionProvider', 'CPUExecutionProvider'],
                 [option, {}])
@@ -72,19 +72,14 @@ def simple_test(self, img, img_meta, **kwargs):
             self.io_binding.bind_output(name)
         self.sess.run_with_iobinding(self.io_binding)
         seg_pred = self.io_binding.copy_outputs_to_cpu()[0]
-        if self.test_mode == 'whole':
-            # whole might support dynamic reshape
-            scale_factor = img_meta[0]['scale_factor']
-            if isinstance(scale_factor, float):
-                scale_factor = (scale_factor, ) * 2
-            scale_factor = [1. / scale for scale in scale_factor]
-            if not torch.all(torch.tensor(scale_factor) == 1.):
-                seg_pred = torch.from_numpy(seg_pred).float()
-                seg_pred = torch.nn.functional.interpolate(
-                    seg_pred,
-                    scale_factor=tuple(scale_factor[:2]),
-                    mode='nearest')
-                seg_pred = seg_pred.long().detach().cpu().numpy()
+        # whole might support dynamic reshape
+        ori_shape = img_meta[0]['ori_shape']
+        if not (ori_shape[0] == seg_pred.shape[-2]
+                and ori_shape[1] == seg_pred.shape[-1]):
+            seg_pred = torch.from_numpy(seg_pred).float()
+            seg_pred = torch.nn.functional.interpolate(
+                seg_pred, size=tuple(ori_shape[:2]), mode='nearest')
+            seg_pred = seg_pred.long().detach().cpu().numpy()
         seg_pred = seg_pred[0]
         seg_pred = list(seg_pred)
         return seg_pred
diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py
index 51db279e1b..e09fa5d486 100644
--- a/tools/pytorch2onnx.py
+++ b/tools/pytorch2onnx.py
@@ -73,8 +73,6 @@ def _demo_mm_inputs(input_shape, num_classes):
 
 def _prepare_input_img(img_path, test_pipeline, shape=None):
     # build the data pipeline
-    if shape is not None:
-        test_pipeline[1]['img_scale'] = shape
     test_pipeline[1]['transforms'][0]['keep_ratio'] = False
     test_pipeline = [LoadImage()] + test_pipeline[1:]
     test_pipeline = Compose(test_pipeline)
@@ -84,6 +82,10 @@ def _prepare_input_img(img_path, test_pipeline, shape=None):
     imgs = data['img']
     img_metas = [i.data for i in data['img_metas']]
 
+    if shape is not None:
+        for img_meta in img_metas:
+            img_meta['ori_shape'] = tuple(shape) + (3, )
+
     mm_inputs = {'imgs': imgs, 'img_metas': img_metas}
 
     return mm_inputs
@@ -91,15 +93,24 @@ def _prepare_input_img(img_path, test_pipeline, shape=None):
 
 def _update_input_img(img_list, img_meta_list):
     # update img and its meta list
-    N, C, H, W = img_list[0].shape
+    N = img_list[0].size(0)
     img_meta = img_meta_list[0][0]
+    img_shape = img_meta['img_shape']
+    ori_shape = img_meta['ori_shape']
+    pad_shape = img_meta['pad_shape']
     new_img_meta_list = [[{
-        'img_shape': (H, W, C),
-        'ori_shape': (H, W, C),
-        'pad_shape': (H, W, C),
-        'filename': img_meta['filename'],
-        'scale_factor': 1.,
-        'flip': False,
+        'img_shape':
+        img_shape,
+        'ori_shape':
+        ori_shape,
+        'pad_shape':
+        pad_shape,
+        'filename':
+        img_meta['filename'],
+        'scale_factor':
+        (img_shape[1] / ori_shape[1], img_shape[0] / ori_shape[0]) * 2,
+        'flip':
+        False,
     } for _ in range(N)]]
 
     return img_list, new_img_meta_list
@@ -142,13 +153,15 @@ def pytorch2onnx(model,
     img_list = [img[None, :] for img in imgs]
     img_meta_list = [[img_meta] for img_meta in img_metas]
     # update img_meta
-    if test_mode == 'whole':
-        img_list, img_meta_list = _update_input_img(img_list, img_meta_list)
+    img_list, img_meta_list = _update_input_img(img_list, img_meta_list)
 
     # replace original forward function
     origin_forward = model.forward
     model.forward = partial(
-        model.forward, img_metas=img_meta_list, return_loss=False)
+        model.forward,
+        img_metas=img_meta_list,
+        return_loss=False,
+        rescale=True)
     dynamic_axes = None
     if dynamic_export:
         if test_mode == 'slide':
@@ -188,13 +201,12 @@ def pytorch2onnx(model,
         onnx_model = onnx.load(output_file)
         onnx.checker.check_model(onnx_model)
 
-        if dynamic_export:
+        if dynamic_export and test_mode == 'whole':
             # scale image for dynamic shape test
-            if test_mode == 'whole':
-                img_list = [
-                    nn.functional.interpolate(_, scale_factor=1.5)
-                    for _ in img_list
-                ]
+            img_list = [
+                nn.functional.interpolate(_, scale_factor=1.5)
+                for _ in img_list
+            ]
             # concate flip image for batch test
             flip_img_list = [_.flip(-1) for _ in img_list]
             img_list = [
@@ -203,9 +215,8 @@ def pytorch2onnx(model,
             ]
 
             # update img_meta
-            if test_mode == 'whole':
-                img_list, img_meta_list = _update_input_img(
-                    img_list, img_meta_list)
+            img_list, img_meta_list = _update_input_img(
+                img_list, img_meta_list)
 
         # check the numerical value
         # get pytorch output
@@ -280,7 +291,7 @@ def parse_args():
         type=int,
         nargs='+',
         default=[256, 256],
-        help='input image size')
+        help='output image size')
     parser.add_argument(
         '--cfg-options',
         nargs='+',
@@ -334,10 +345,7 @@ def parse_args():
 
     # read input or create dummpy input
     if args.input_img is not None:
-        preprocess_shape = (input_shape[3], input_shape[2])
-        if test_mode == 'slide':
-            # slide mode does not support dynamic shape
-            preprocess_shape = None
+        preprocess_shape = (input_shape[2], input_shape[3])
         mm_inputs = _prepare_input_img(args.input_img, cfg.data.test.pipeline,
                                        preprocess_shape)
     else:

From d706bfb580de427373726c56c1da628b4f5c8794 Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Tue, 20 Apr 2021 13:43:40 +0800
Subject: [PATCH 3/9] update pytorch2onnx, add rescale_shape support, add
 document

---
 docs/useful_tools.md  | 44 ++++++++++++++++++++++++++++++++++-
 tools/ort_test.py     |  7 +++---
 tools/pytorch2onnx.py | 53 +++++++++++++++++++++++++++++++------------
 3 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/docs/useful_tools.md b/docs/useful_tools.md
index 8286af83e5..ee0db92108 100644
--- a/docs/useful_tools.md
+++ b/docs/useful_tools.md
@@ -53,6 +53,7 @@ python tools/pytorch2onnx.py \
     --output-file ${ONNX_FILE} \
     --input-img ${INPUT_IMG} \
     --shape ${INPUT_SHAPE} \
+    --rescale-shape ${RESCALE_SHAPE} \
     --show \
     --verify \
     --dynamic-export \
@@ -66,7 +67,8 @@ Description of arguments:
 - `--checkpoint` : The path of a model checkpoint file.
 - `--output-file`: The path of output ONNX model. If not specified, it will be set to `tmp.onnx`.
 - `--input-img` : The path of an input image for conversion and visualize.
-- `--shape`: The height and width of input tensor to the model. If not specified, it will be set to `256 256`.
+- `--shape`: The height and width of input tensor to the model. If not specified, it will be set to img_scale of testpipeline.
+- `--rescale-shape`: rescale shape of output, set this value to avoid OOM, only work on `slide` mode.
 - `--show`: Determines whether to print the architecture of the exported model. If not specified, it will be set to `False`.
 - `--verify`: Determines whether to verify the correctness of an exported model. If not specified, it will be set to `False`.
 - `--dynamic-export`: Determines whether to export ONNX model with dynamic input and output shapes. If not specified, it will be set to `False`.
@@ -74,6 +76,46 @@ Description of arguments:
 
 **Note**: This tool is still experimental. Some customized operators are not supported for now.
 
+### Evaluate ONNX model with ONNXRuntime
+
+We provide `tools/ort_test.py` to evaluate ONNX model with ONNXRuntime backend.
+
+#### Prerequisite
+
+- Install onnx and onnxruntime-gpu
+
+  ```shell
+  pip install onnx onnxruntime-gpu
+  ```
+
+#### Usage
+
+```python
+python tools/ort_test.py \
+    ${CONFIG_FILE} \
+    ${ONNX_FILE} \
+    --out ${OUTPUT_FILE} \
+    --eval ${EVALUATION_METRICS} \
+    --show \
+    --show-dir ${SHOW_DIRECTORY} \
+    --options ${CFG_OPTIONS} \
+    --eval-options ${EVALUATION_OPTIONS} \
+    --opacity ${OPACITY} \
+```
+
+Description of all arguments
+
+- `config`: The path of a model config file.
+- `model`: The path of a ONNX model file.
+- `--out`: The path of output result file in pickle format.
+- `--format-only` : Format the output results without perform evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. If not specified, it will be set to `False`. Note that this argument is **mutually exclusive** with `--eval`.
+- `--eval`: Evaluation metrics, which depends on the dataset, e.g., "mIoU" for generic datasets, and "cityscapes" for Cityscapes. Note that this argument is **mutually exclusive** with `--format-only`.
+- `--show`: Show results flag.
+- `--show-dir`: Directory where painted images will be saved
+- `--options`: Override some settings in the used config file, the key-value pair in `xxx=yyy` format will be merged into config file.
+- `--eval-options`: Custom options for evaluation, the key-value pair in `xxx=yyy` format will be kwargs for `dataset.evaluate()` function
+- `--opacity`: Opacity of painted segmentation map. In (0, 1] range.
+
 ## Miscellaneous
 
 ### Print the entire config
diff --git a/tools/ort_test.py b/tools/ort_test.py
index da04a1d8cd..c07eaab6a8 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -1,11 +1,12 @@
 import argparse
+import os
+import os.path as osp
+import warnings
+
 import mmcv
 import numpy as np
 import onnxruntime as ort
-import os
-import os.path as osp
 import torch
-import warnings
 from mmcv.parallel import MMDataParallel
 from mmcv.runner import get_dist_info
 from mmcv.utils import DictAction
diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py
index e09fa5d486..7a9fe52091 100644
--- a/tools/pytorch2onnx.py
+++ b/tools/pytorch2onnx.py
@@ -71,8 +71,13 @@ def _demo_mm_inputs(input_shape, num_classes):
     return mm_inputs
 
 
-def _prepare_input_img(img_path, test_pipeline, shape=None):
+def _prepare_input_img(img_path,
+                       test_pipeline,
+                       shape=None,
+                       rescale_shape=None):
     # build the data pipeline
+    if shape is not None:
+        test_pipeline[1]['img_scale'] = shape
     test_pipeline[1]['transforms'][0]['keep_ratio'] = False
     test_pipeline = [LoadImage()] + test_pipeline[1:]
     test_pipeline = Compose(test_pipeline)
@@ -82,9 +87,9 @@ def _prepare_input_img(img_path, test_pipeline, shape=None):
     imgs = data['img']
     img_metas = [i.data for i in data['img_metas']]
 
-    if shape is not None:
+    if rescale_shape is not None:
         for img_meta in img_metas:
-            img_meta['ori_shape'] = tuple(shape) + (3, )
+            img_meta['ori_shape'] = tuple(rescale_shape) + (3, )
 
     mm_inputs = {'imgs': imgs, 'img_metas': img_metas}
 
@@ -148,7 +153,6 @@ def pytorch2onnx(model,
 
     imgs = mm_inputs.pop('imgs')
     img_metas = mm_inputs.pop('img_metas')
-    ori_shape = img_metas[0]['ori_shape']
 
     img_list = [img[None, :] for img in imgs]
     img_meta_list = [[img_meta] for img_meta in img_metas]
@@ -242,6 +246,10 @@ def pytorch2onnx(model,
             if not osp.exists(img):
                 img = imgs[0][:3, ...].permute(1, 2, 0) * 255
                 img = img.detach().numpy().astype(np.uint8)
+                ori_shape = img.shape[:2]
+            else:
+                ori_shape = LoadImage()({'img': img})['ori_shape']
+
             # resize onnx_result to ori_shape
             onnx_result_ = cv2.resize(onnx_result[0].astype(np.uint8),
                                       (ori_shape[1], ori_shape[0]))
@@ -290,8 +298,14 @@ def parse_args():
         '--shape',
         type=int,
         nargs='+',
-        default=[256, 256],
-        help='output image size')
+        default=None,
+        help='input image height and width.')
+    parser.add_argument(
+        '--rescale_shape',
+        type=int,
+        nargs='+',
+        default=None,
+        help='output image rescale height and width, work for slide mode.')
     parser.add_argument(
         '--cfg-options',
         nargs='+',
@@ -313,7 +327,15 @@ def parse_args():
 if __name__ == '__main__':
     args = parse_args()
 
-    if len(args.shape) == 1:
+    cfg = mmcv.Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    cfg.model.pretrained = None
+
+    if args.shape is None:
+        img_scale = cfg.test_pipeline[1]['img_scale']
+        input_shape = (1, 3, img_scale[1], img_scale[0])
+    elif len(args.shape) == 1:
         input_shape = (1, 3, args.shape[0], args.shape[0])
     elif len(args.shape) == 2:
         input_shape = (
@@ -323,11 +345,6 @@ def parse_args():
     else:
         raise ValueError('invalid input shape')
 
-    cfg = mmcv.Config.fromfile(args.config)
-    if args.cfg_options is not None:
-        cfg.merge_from_dict(args.cfg_options)
-    cfg.model.pretrained = None
-
     test_mode = cfg.model.test_cfg.mode
 
     # build the model and load checkpoint
@@ -345,9 +362,15 @@ def parse_args():
 
     # read input or create dummpy input
     if args.input_img is not None:
-        preprocess_shape = (input_shape[2], input_shape[3])
-        mm_inputs = _prepare_input_img(args.input_img, cfg.data.test.pipeline,
-                                       preprocess_shape)
+        preprocess_shape = (input_shape[3], input_shape[2])
+        rescale_shape = None
+        if args.rescale_shape is not None:
+            rescale_shape = [args.rescale_shape[1], args.rescale_shape[0]]
+        mm_inputs = _prepare_input_img(
+            args.input_img,
+            cfg.data.test.pipeline,
+            shape=preprocess_shape,
+            rescale_shape=rescale_shape)
     else:
         if isinstance(segmentor.decode_head, nn.ModuleList):
             num_classes = segmentor.decode_head[-1].num_classes

From 193a571051f89a8212e003d9a60bbd19435a778e Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Tue, 20 Apr 2021 14:27:28 +0800
Subject: [PATCH 4/9] update doc for lint error fixing

---
 docs/useful_tools.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/useful_tools.md b/docs/useful_tools.md
index c21f1347f1..e4a8c5502f 100644
--- a/docs/useful_tools.md
+++ b/docs/useful_tools.md
@@ -115,6 +115,7 @@ Description of all arguments
 - `--options`: Override some settings in the used config file, the key-value pair in `xxx=yyy` format will be merged into config file.
 - `--eval-options`: Custom options for evaluation, the key-value pair in `xxx=yyy` format will be kwargs for `dataset.evaluate()` function
 - `--opacity`: Opacity of painted segmentation map. In (0, 1] range.
+
 ### Convert to TorchScript (experimental)
 
 We also provide a script to convert model to [TorchScript](https://pytorch.org/docs/stable/jit.html) format. You can use the pytorch C++ API [LibTorch](https://pytorch.org/docs/stable/cpp_index.html) inference the trained model. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between Pytorch and TorchScript model.

From 393e393d365ee37d60b25bedfc14ff0c89ed0a03 Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Wed, 21 Apr 2021 19:33:40 +0800
Subject: [PATCH 5/9] remove cpu flag in ort_test.py

---
 tools/ort_test.py | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/tools/ort_test.py b/tools/ort_test.py
index c07eaab6a8..761d0821ce 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -18,7 +18,7 @@
 
 class ONNXRuntimeDetector(BaseSegmentor):
 
-    def __init__(self, onnx_file, cfg, device_id=None):
+    def __init__(self, onnx_file, cfg, device_id):
         super(ONNXRuntimeDetector, self).__init__()
         # get the custom op path
         ort_custom_op_path = ''
@@ -33,13 +33,14 @@ def __init__(self, onnx_file, cfg, device_id=None):
         if osp.exists(ort_custom_op_path):
             session_options.register_custom_ops_library(ort_custom_op_path)
         sess = ort.InferenceSession(onnx_file, session_options)
-        if device_id is not None:
-            option = {'device_id': device_id}
-            sess.set_providers(
-                ['CUDAExecutionProvider', 'CPUExecutionProvider'],
-                [option, {}])
-        else:
-            sess.set_providers(['CPUExecutionProvider'])
+        providers = ['CPUExecutionProvider']
+        options = [{}]
+        is_cuda_available = ort.get_device() == 'GPU'
+        if is_cuda_available:
+            providers.append('CUDAExecutionProvider')
+            options.append({'device_id': device_id})
+
+        sess.set_providers(providers, options)
 
         self.sess = sess
         self.device_id = device_id
@@ -58,14 +59,11 @@ def forward_train(self, imgs, img_metas, **kwargs):
         raise NotImplementedError('This method is not implemented.')
 
     def simple_test(self, img, img_meta, **kwargs):
-        device_type = 'cpu' if self.device_id is None else 'cuda'
-        device_id = self.device_id if self.device_id is not None else 0
-        if device_type == 'cpu':
-            img = img.cpu()
+        device_type = img.device.type
         self.io_binding.bind_input(
             name='input',
             device_type=device_type,
-            device_id=device_id,
+            device_id=self.device_id,
             element_type=np.float32,
             shape=img.shape,
             buffer_ptr=img.data_ptr())
@@ -122,7 +120,6 @@ def parse_args():
         type=float,
         default=0.5,
         help='Opacity of painted segmentation map. In (0, 1] range.')
-    parser.add_argument('--cpu', action='store_true', help='test use cpu')
     parser.add_argument('--local_rank', type=int, default=0)
     args = parser.parse_args()
     if 'LOCAL_RANK' not in os.environ:
@@ -156,7 +153,6 @@ def main():
 
     # build the dataloader
     # TODO: support multiple images per gpu (only minor changes are needed)
-    device_id = 0 if not args.cpu else None
     dataset = build_dataset(cfg.data.test)
     data_loader = build_dataloader(
         dataset,
@@ -167,7 +163,7 @@ def main():
 
     # load onnx config and meta
     cfg.model.train_cfg = None
-    model = ONNXRuntimeDetector(args.model, cfg=cfg, device_id=device_id)
+    model = ONNXRuntimeDetector(args.model, cfg=cfg, device_id=0)
     model.CLASSES = dataset.CLASSES
     model.PALETTE = dataset.PALETTE
 
@@ -175,8 +171,7 @@ def main():
     if args.eval_options is not None:
         efficient_test = args.eval_options.get('efficient_test', False)
 
-    model = MMDataParallel(
-        model, device_ids=[device_id] if device_id is not None else None)
+    model = MMDataParallel(model, device_ids=[0])
     outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
                               efficient_test, args.opacity)
 

From 4dbc15c503d904ba72d39a6579e74dc89fd776f8 Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Thu, 22 Apr 2021 18:56:34 +0800
Subject: [PATCH 6/9] change class name, fix cuda error

---
 tools/ort_test.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tools/ort_test.py b/tools/ort_test.py
index 761d0821ce..7b573f5438 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -16,10 +16,10 @@
 from mmseg.models.segmentors.base import BaseSegmentor
 
 
-class ONNXRuntimeDetector(BaseSegmentor):
+class ONNXRuntimeSegmentor(BaseSegmentor):
 
     def __init__(self, onnx_file, cfg, device_id):
-        super(ONNXRuntimeDetector, self).__init__()
+        super(ONNXRuntimeSegmentor, self).__init__()
         # get the custom op path
         ort_custom_op_path = ''
         try:
@@ -37,8 +37,10 @@ def __init__(self, onnx_file, cfg, device_id):
         options = [{}]
         is_cuda_available = ort.get_device() == 'GPU'
         if is_cuda_available:
-            providers.append('CUDAExecutionProvider')
-            options.append({'device_id': device_id})
+            # providers.append('CUDAExecutionProvider')
+            # options.append({'device_id': device_id})
+            providers.insert(0, 'CUDAExecutionProvider')
+            options.insert(0, {'device_id': device_id})
 
         sess.set_providers(providers, options)
 
@@ -163,7 +165,7 @@ def main():
 
     # load onnx config and meta
     cfg.model.train_cfg = None
-    model = ONNXRuntimeDetector(args.model, cfg=cfg, device_id=0)
+    model = ONNXRuntimeSegmentor(args.model, cfg=cfg, device_id=0)
     model.CLASSES = dataset.CLASSES
     model.PALETTE = dataset.PALETTE
 

From 42d1276f7acbb5e23c2d4665007b73a2bd4f2e7e Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Thu, 22 Apr 2021 19:02:21 +0800
Subject: [PATCH 7/9] remote comment

---
 tools/ort_test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/ort_test.py b/tools/ort_test.py
index 7b573f5438..314e9c0a47 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -37,8 +37,6 @@ def __init__(self, onnx_file, cfg, device_id):
         options = [{}]
         is_cuda_available = ort.get_device() == 'GPU'
         if is_cuda_available:
-            # providers.append('CUDAExecutionProvider')
-            # options.append({'device_id': device_id})
             providers.insert(0, 'CUDAExecutionProvider')
             options.insert(0, {'device_id': device_id})
 

From 2360e0e16eb397b19919026db7ffe36c80ec0ab4 Mon Sep 17 00:00:00 2001
From: "q.yao" <yaoqian@sensetime.com>
Date: Tue, 27 Apr 2021 09:32:16 +0800
Subject: [PATCH 8/9] fix bug of torch2onnx

---
 docs/useful_tools.md  | 9 +++++++++
 tools/ort_test.py     | 4 ++--
 tools/pytorch2onnx.py | 6 +++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/docs/useful_tools.md b/docs/useful_tools.md
index e4a8c5502f..335ca51467 100644
--- a/docs/useful_tools.md
+++ b/docs/useful_tools.md
@@ -116,6 +116,15 @@ Description of all arguments
 - `--eval-options`: Custom options for evaluation, the key-value pair in `xxx=yyy` format will be kwargs for `dataset.evaluate()` function
 - `--opacity`: Opacity of painted segmentation map. In (0, 1] range.
 
+#### Results and Models
+
+|   Model    |                     Config                     |  Dataset   | Metric | PyTorch | ONNXRuntime |
+| :--------: | :--------------------------------------------: | :--------: | :----: | :-----: | :---------: |
+|    FCN     |     fcn_r50-d8_512x1024_40k_cityscapes.py      | cityscapes |  mIOU  |  72.2   |    72.2     |
+|   PSPNet   |    pspnet_r50-d8_769x769_40k_cityscapes.py     | cityscapes |  mIOU  |  78.2   |    78.1     |
+| deeplabv3  |   deeplabv3_r50-d8_769x769_40k_cityscapes.py   | cityscapes |  mIOU  |  78.5   |    78.3     |
+| deeplabv3+ | deeplabv3plus_r50-d8_769x769_40k_cityscapes.py | cityscapes |  mIOU  |  78.9   |    78.7     |
+
 ### Convert to TorchScript (experimental)
 
 We also provide a script to convert model to [TorchScript](https://pytorch.org/docs/stable/jit.html) format. You can use the pytorch C++ API [LibTorch](https://pytorch.org/docs/stable/cpp_index.html) inference the trained model. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between Pytorch and TorchScript model.
diff --git a/tools/ort_test.py b/tools/ort_test.py
index 314e9c0a47..807b21272a 100644
--- a/tools/ort_test.py
+++ b/tools/ort_test.py
@@ -46,6 +46,8 @@ def __init__(self, onnx_file, cfg, device_id):
         self.device_id = device_id
         self.io_binding = sess.io_binding()
         self.output_names = [_.name for _ in sess.get_outputs()]
+        for name in self.output_names:
+            self.io_binding.bind_output(name)
         self.cfg = cfg
         self.test_mode = cfg.model.test_cfg.mode
 
@@ -67,8 +69,6 @@ def simple_test(self, img, img_meta, **kwargs):
             element_type=np.float32,
             shape=img.shape,
             buffer_ptr=img.data_ptr())
-        for name in self.output_names:
-            self.io_binding.bind_output(name)
         self.sess.run_with_iobinding(self.io_binding)
         seg_pred = self.io_binding.copy_outputs_to_cpu()[0]
         # whole might support dynamic reshape
diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py
index 7a9fe52091..5660ed9004 100644
--- a/tools/pytorch2onnx.py
+++ b/tools/pytorch2onnx.py
@@ -77,7 +77,7 @@ def _prepare_input_img(img_path,
                        rescale_shape=None):
     # build the data pipeline
     if shape is not None:
-        test_pipeline[1]['img_scale'] = shape
+        test_pipeline[1]['img_scale'] = (shape[1], shape[0])
     test_pipeline[1]['transforms'][0]['keep_ratio'] = False
     test_pipeline = [LoadImage()] + test_pipeline[1:]
     test_pipeline = Compose(test_pipeline)
@@ -362,10 +362,10 @@ def parse_args():
 
     # read input or create dummpy input
     if args.input_img is not None:
-        preprocess_shape = (input_shape[3], input_shape[2])
+        preprocess_shape = (input_shape[2], input_shape[3])
         rescale_shape = None
         if args.rescale_shape is not None:
-            rescale_shape = [args.rescale_shape[1], args.rescale_shape[0]]
+            rescale_shape = [args.rescale_shape[0], args.rescale_shape[1]]
         mm_inputs = _prepare_input_img(
             args.input_img,
             cfg.data.test.pipeline,

From 61071d95a7b2d9b61fc9dec856cf12c3c30d12d9 Mon Sep 17 00:00:00 2001
From: "q.yao" <streetyao@live.com>
Date: Wed, 28 Apr 2021 22:34:07 +0800
Subject: [PATCH 9/9] mIOU to mIoU

---
 docs/useful_tools.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/useful_tools.md b/docs/useful_tools.md
index 335ca51467..69a9555233 100644
--- a/docs/useful_tools.md
+++ b/docs/useful_tools.md
@@ -120,10 +120,10 @@ Description of all arguments
 
 |   Model    |                     Config                     |  Dataset   | Metric | PyTorch | ONNXRuntime |
 | :--------: | :--------------------------------------------: | :--------: | :----: | :-----: | :---------: |
-|    FCN     |     fcn_r50-d8_512x1024_40k_cityscapes.py      | cityscapes |  mIOU  |  72.2   |    72.2     |
-|   PSPNet   |    pspnet_r50-d8_769x769_40k_cityscapes.py     | cityscapes |  mIOU  |  78.2   |    78.1     |
-| deeplabv3  |   deeplabv3_r50-d8_769x769_40k_cityscapes.py   | cityscapes |  mIOU  |  78.5   |    78.3     |
-| deeplabv3+ | deeplabv3plus_r50-d8_769x769_40k_cityscapes.py | cityscapes |  mIOU  |  78.9   |    78.7     |
+|    FCN     |     fcn_r50-d8_512x1024_40k_cityscapes.py      | cityscapes |  mIoU  |  72.2   |    72.2     |
+|   PSPNet   |    pspnet_r50-d8_769x769_40k_cityscapes.py     | cityscapes |  mIoU  |  78.2   |    78.1     |
+| deeplabv3  |   deeplabv3_r50-d8_769x769_40k_cityscapes.py   | cityscapes |  mIoU  |  78.5   |    78.3     |
+| deeplabv3+ | deeplabv3plus_r50-d8_769x769_40k_cityscapes.py | cityscapes |  mIoU  |  78.9   |    78.7     |
 
 ### Convert to TorchScript (experimental)