From 01640b2462cdbe48e0f53efc93bfd8cb852d2d9c Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 9 Nov 2020 18:56:40 +0800 Subject: [PATCH 01/15] add pad_to_tensorcore & legalize for dense/bmm/conv2d --- python/tvm/relay/op/nn/_nn.py | 42 ++++++++++++++++++++++ python/tvm/topi/cuda/__init__.py | 1 + python/tvm/topi/cuda/conv2d_alter_op.py | 48 +++++++++++++++++++++++++ python/tvm/topi/nn/batch_matmul.py | 23 ++++++++++++ python/tvm/topi/nn/dense.py | 23 ++++++++++++ 5 files changed, 137 insertions(+) diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index c235f87d1e99..42cbbfc41673 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -45,6 +45,27 @@ reg.register_pattern("nn.log_softmax", OpPattern.OPAQUE) +@reg.register_legalize("nn.dense") +def legalize_dense(attrs, inputs, types): + """Legalize conv2d op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + return topi.nn.dense_legalize(attrs, inputs, types) + + # dense reg.register_strategy("nn.dense", strategy.dense_strategy) reg.register_pattern("nn.dense", reg.OpPattern.OUT_ELEMWISE_FUSABLE) @@ -60,6 +81,27 @@ def compute_fifo_buffer(attrs, inputs, out_type): reg.register_pattern("nn.fifo_buffer", OpPattern.OPAQUE) +@reg.register_legalize("nn.batch_matmul") +def legalize_batch_matmul(attrs, inputs, types): + """Legalize conv2d op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + return topi.nn.batch_matmul_legalize(attrs, inputs, types) + + # batch_matmul reg.register_strategy("nn.batch_matmul", strategy.batch_matmul_strategy) reg.register_pattern("nn.batch_matmul", reg.OpPattern.OUT_ELEMWISE_FUSABLE) diff --git a/python/tvm/topi/cuda/__init__.py b/python/tvm/topi/cuda/__init__.py index 3ff544f4bb3e..5770b4dc7198 100644 --- a/python/tvm/topi/cuda/__init__.py +++ b/python/tvm/topi/cuda/__init__.py @@ -54,3 +54,4 @@ from .conv2d_hwnc_tensorcore import * from .correlation import * from .sparse import * +from . import tensorcore_alter_op diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 609ead3e6398..4e228e48c2ce 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -26,8 +26,10 @@ from .. import nn from ..utils import get_const_tuple from .conv2d_winograd import _infer_tile_size +from .tensorcore_alter_op import pad_to_tensorcore from ..nn import conv2d_legalize + logger = logging.getLogger("topi") @@ -325,4 +327,50 @@ def _conv2d_legalize(attrs, inputs, arg_types): else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out + elif data_dtype in ['float16', 'float32']: + if data_layout == 'NHWC' and kernel_layout == "HWIO": + batch = data_tensor.shape[0].value + in_channel = data_tensor.shape[3].value + out_channel = kernel_tensor.shape[3].value + + if ((batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0) or \ + (batch % 16 == 0 and in_channel % 16 == 0 and out_channel % 16 == 0) or \ + (batch % 32 == 0 and in_channel % 16 == 0 and out_channel % 8 == 0)): + # no need to pad + return None + + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel) + + if extra_flops > 2: + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + return None + + logger.info("conv2d pad_to_tensorcore, extra_flops %s" % extra_flops) + + # Pad batch size + if db != 0: + data = relay.nn.pad(data, pad_width=((0, db), (0, 0), (0, 0), (0, 0))) + + # Pad input channel + if di != 0: + data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, di))) + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, di), (0, 0))) + + # Pad output channel + if do != 0: + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, do))) + + if do != 0: + new_out_channel = out_channel + do + new_attrs['channels'] = new_out_channel + out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) + else: + out = relay.nn.conv2d(data, kernel, **new_attrs) + + if db != 0 or do != 0: + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out, begin=relay.const([0, 0, 0, 0]), + end=relay.const(original_out_shape)) + + return out return None diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index 6e60f27eab5d..2ef00ce200ee 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -16,6 +16,7 @@ # under the License. """Binary Neural Network (BNN) Operators""" # pylint: disable=invalid-name +import tvm from tvm import te from ..utils import get_const_tuple @@ -59,3 +60,25 @@ def batch_matmul(x, y, oshape=None): lambda b, i, j: te.sum(x[b if XB != 1 else 0, i, k] * y[b if YB != 1 else 0, j, k], axis=k), tag="batch_matmul", ) + + +@tvm.target.generic_func +def batch_matmul_legalize(attrs, inputs, types): + """Legalizes Conv2D op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + # not to change by default + return None diff --git a/python/tvm/topi/nn/dense.py b/python/tvm/topi/nn/dense.py index 0ce0f9ea1299..93068336180f 100644 --- a/python/tvm/topi/nn/dense.py +++ b/python/tvm/topi/nn/dense.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. """TVM operator fully connected compute.""" +import tvm from tvm import te from .. import tag @@ -62,3 +63,25 @@ def dense(data, weight, bias=None, out_dtype=None): tag=tag.BROADCAST, ) return matmul + + +@tvm.target.generic_func +def dense_legalize(attrs, inputs, types): + """Legalizes Conv2D op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + # not to change by default + return None From 6a3271d8e723a00c0175db957d3e8c535f7f801f Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 9 Nov 2020 21:03:10 +0800 Subject: [PATCH 02/15] fix pad & slice --- python/tvm/relay/op/transform.py | 4 +- python/tvm/topi/cuda/conv2d_alter_op.py | 3 +- python/tvm/topi/cuda/tensorcore_alter_op.py | 192 ++++++++++++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 python/tvm/topi/cuda/tensorcore_alter_op.py diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index a3f97392e36e..7455fb2be04a 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -21,7 +21,7 @@ from . import _make from .dyn import _make as _dyn_make from .tensor import shape_of -from ..expr import TupleWrapper, const, Expr, Tuple +from ..expr import TupleWrapper, const, Expr, Tuple, Constant from ...tir import expr as _expr @@ -860,7 +860,7 @@ def strided_slice(data, begin, end, strides=None, slice_mode="end"): The computed result. """ strides = strides or [1] - if isinstance(begin, Expr) or isinstance(end, Expr) or isinstance(strides, Expr): + if any([(isinstance(i, Expr) and not isinstance(i, Constant)) for i in (begin, end, strides)]): if isinstance(begin, (tuple, list)): begin = const(list(begin)) if isinstance(end, (tuple, list)): diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 4e228e48c2ce..906b146b0259 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -369,8 +369,7 @@ def _conv2d_legalize(attrs, inputs, arg_types): if db != 0 or do != 0: original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out, begin=relay.const([0, 0, 0, 0]), - end=relay.const(original_out_shape)) + out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) return out return None diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py new file mode 100644 index 000000000000..a21a46e5e029 --- /dev/null +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -0,0 +1,192 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,unused-variable,unused-argument +"""Tensorcore alter op and legalize functions for cuda backend""" + +import logging +import tvm +from tvm import te +from tvm import relay +from tvm import autotvm +import math + +from .. import nn +from ..utils import get_const_tuple + +logger = logging.getLogger('topi') + + +@nn.batch_matmul_legalize.register("cuda") +def _batch_matmul_legalize(attrs, inputs, arg_types): + """Legalizes Conv2D op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + # Collect the input tensors. + x_tensor, y_tensor = arg_types[0], arg_types[1] + dtype = x_tensor.dtype + + # Collect the output tensor. + output_tensor = arg_types[2] + + # Collect the input exprs. + x, y = inputs + + # Pad input and output channels to use tensorcore schedule. + if dtype in ['float16', 'float32']: + B, M, K = x_tensor.shape + B, N, K = y_tensor.shape + M = M.value + K = K.value + N = N.value + + if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ + (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ + (M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): + "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" + # no need to pad + return None + + # todo: 1. check the padding size 2. pad to 8*16*32/32*16*8 liuxin 2020/7/15 + + (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N) + + if extra_flops > 2: + logger.info("batch_matmul pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + return None + + logger.info("batch_matmul pad_to_tensorcore, extra_flops %s" % extra_flops) + + x_ = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) + y_ = relay.nn.pad(y, pad_width=((0, 0), (0, dn), (0, dk))) + out_ = relay.nn.batch_matmul(x_, y_) + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out_, + begin=[0, 0, 0], + end=original_out_shape) + return out + return None + + +@nn.dense_legalize.register("cuda") +def _dense_legalize(attrs, inputs, arg_types): + """Legalizes Conv2D op. + + Parameters + ---------- + attrs : tvm.ir.Attrs + Attributes of current convolution + inputs : list of tvm.relay.Expr + The args of the Relay expr to be legalized + types : list of types + List of input and output types + + Returns + ------- + result : tvm.relay.Expr + The legalized expr + """ + # Collect the input tensors. + x_tensor, y_tensor = arg_types[0], arg_types[1] + dtype = x_tensor.dtype + + # Collect the output tensor. + output_tensor = arg_types[2] + + # Collect the input exprs. + x, y = inputs + + # Pad input and output channels to use tensorcore schedule. + if dtype in ['float16', 'float32']: + M, K = x_tensor.shape + N, K = y_tensor.shape + try: + M = M.value + K = K.value + N = N.value + except AttributeError: + # todo: deal with unfixed shape when compiling wdl model + return None + + if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ + (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ + (M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): + "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" + # no need to pad + return None + + (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N) + + if extra_flops > 2: + logger.info("dense pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + return None + + logger.info("dense pad_to_tensorcore, extra_flops %s" % extra_flops) + + x_ = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) + y_ = relay.nn.pad(y, pad_width=((0, dn), (0, dk))) + out_ = relay.nn.dense(x_, y_) + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out_, + begin=[0, 0], + end=original_out_shape) + return out + return None + + +def pad_to_tensorcore(M, K, N): + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + + flops = M * K * N + extra_flops = math.inf + best_pad = (0, 0, 0) + for padding in candidates: + dm, dk, dn = _pad_to(M, K, N, padding) + e = dm * (K+dk) * (N+dn) + dk * (N+dn) * (M+dm) + dn * (K+dk) * (M+dm) + # print(dm, dk, dn, e, flops) + if e < extra_flops: + extra_flops = e + best_pad = (dm, dk, dn) + return best_pad, extra_flops / flops + + +def _pad_to(M, K, N, PADDING): + dm, dk, dn = 0, 0, 0 + + if M % PADDING[0] != 0: + M_ = ((M + PADDING[0]) // PADDING[0]) * PADDING[0] + dm = M_ - M + if K % PADDING[1] != 0: + K_ = ((K + PADDING[1]) // PADDING[1]) * PADDING[1] + dk = K_ - K + if N % PADDING[2] != 0: + N_ = ((N + PADDING[2]) // PADDING[2]) * PADDING[2] + dn = N_ - N + + return dm, dk, dn From e5dbf1f015632d05f1d6f30318159f3af77733a5 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Thu, 19 Nov 2020 13:31:38 +0800 Subject: [PATCH 03/15] fix comments --- python/tvm/relay/op/nn/_nn.py | 4 ++-- python/tvm/topi/cuda/tensorcore_alter_op.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index 42cbbfc41673..95bf6b3f9bd0 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -47,7 +47,7 @@ @reg.register_legalize("nn.dense") def legalize_dense(attrs, inputs, types): - """Legalize conv2d op. + """Legalize dense op. Parameters ---------- @@ -83,7 +83,7 @@ def compute_fifo_buffer(attrs, inputs, out_type): @reg.register_legalize("nn.batch_matmul") def legalize_batch_matmul(attrs, inputs, types): - """Legalize conv2d op. + """Legalize batch_matmul op. Parameters ---------- diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index a21a46e5e029..271d4bb3d5f2 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -32,7 +32,7 @@ @nn.batch_matmul_legalize.register("cuda") def _batch_matmul_legalize(attrs, inputs, arg_types): - """Legalizes Conv2D op. + """Legalizes batch_matmul op. Parameters ---------- @@ -40,7 +40,7 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): Attributes of current convolution inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized - types : list of types + arg_types : list of types List of input and output types Returns @@ -96,7 +96,7 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): @nn.dense_legalize.register("cuda") def _dense_legalize(attrs, inputs, arg_types): - """Legalizes Conv2D op. + """Legalizes dense op. Parameters ---------- @@ -141,13 +141,13 @@ def _dense_legalize(attrs, inputs, arg_types): # no need to pad return None - (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N) + (dm, dk, dn), extra_flops_ratio = pad_to_tensorcore(M, K, N) - if extra_flops > 2: - logger.info("dense pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + if extra_flops_ratio > 2: + logger.info("dense pad_to_tensorcore skipped, extra_flops_ratio %s" % extra_flops_ratio) return None - logger.info("dense pad_to_tensorcore, extra_flops %s" % extra_flops) + logger.info("dense pad_to_tensorcore, extra_flops_ratio %s" % extra_flops_ratio) x_ = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) y_ = relay.nn.pad(y, pad_width=((0, dn), (0, dk))) @@ -168,7 +168,7 @@ def pad_to_tensorcore(M, K, N): best_pad = (0, 0, 0) for padding in candidates: dm, dk, dn = _pad_to(M, K, N, padding) - e = dm * (K+dk) * (N+dn) + dk * (N+dn) * (M+dm) + dn * (K+dk) * (M+dm) + e = (M + dm) * (N + dn) * (K + dk) - M * N * K # print(dm, dk, dn, e, flops) if e < extra_flops: extra_flops = e From 32706340511882896b8fdcf232cb12e8f56f43da Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Tue, 22 Dec 2020 14:42:19 +0800 Subject: [PATCH 04/15] fix comments --- python/tvm/topi/nn/batch_matmul.py | 4 ++-- python/tvm/topi/nn/dense.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index 9fd795807dc4..3cd3c53c3bcb 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -64,12 +64,12 @@ def batch_matmul(x, y, oshape=None): @tvm.target.generic_func def batch_matmul_legalize(attrs, inputs, types): - """Legalizes Conv2D op. + """Legalizes batch_matmul op. Parameters ---------- attrs : tvm.ir.Attrs - Attributes of current convolution + Attributes of current batch_matmul inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized types : list of types diff --git a/python/tvm/topi/nn/dense.py b/python/tvm/topi/nn/dense.py index 93068336180f..348a6098d99f 100644 --- a/python/tvm/topi/nn/dense.py +++ b/python/tvm/topi/nn/dense.py @@ -67,12 +67,12 @@ def dense(data, weight, bias=None, out_dtype=None): @tvm.target.generic_func def dense_legalize(attrs, inputs, types): - """Legalizes Conv2D op. + """Legalizes dense op. Parameters ---------- attrs : tvm.ir.Attrs - Attributes of current convolution + Attributes of current dense inputs : list of tvm.relay.Expr The args of the Relay expr to be legalized types : list of types From 5786ea7225b4101aba1c93e5e0e9927d86e364b0 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 11 Jan 2021 14:40:39 +0800 Subject: [PATCH 05/15] resolve conflict --- python/tvm/topi/nn/batch_matmul.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index 7b37f4d6121e..7507851fcbc9 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -74,7 +74,6 @@ def batch_matmul(x, y, oshape=None, auto_scheduler_rewritten_layout=""): attrs={"layout_free_placeholders": [y]}, ) -<<<<<<< HEAD @tvm.target.generic_func def batch_matmul_legalize(attrs, inputs, types): @@ -96,9 +95,7 @@ def batch_matmul_legalize(attrs, inputs, types): """ # not to change by default return None -======= if auto_scheduler_rewritten_layout: output = auto_scheduler.rewrite_compute_body(output, auto_scheduler_rewritten_layout) return output ->>>>>>> main From 3653ff68c7e0b896d3ec0405f980a8e56ed8c9cc Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 11 Jan 2021 14:44:34 +0800 Subject: [PATCH 06/15] resolve conflict --- python/tvm/topi/nn/batch_matmul.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index 7507851fcbc9..dccb103fabd5 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -74,6 +74,11 @@ def batch_matmul(x, y, oshape=None, auto_scheduler_rewritten_layout=""): attrs={"layout_free_placeholders": [y]}, ) + if auto_scheduler_rewritten_layout: + output = auto_scheduler.rewrite_compute_body(output, auto_scheduler_rewritten_layout) + + return output + @tvm.target.generic_func def batch_matmul_legalize(attrs, inputs, types): @@ -95,7 +100,3 @@ def batch_matmul_legalize(attrs, inputs, types): """ # not to change by default return None - if auto_scheduler_rewritten_layout: - output = auto_scheduler.rewrite_compute_body(output, auto_scheduler_rewritten_layout) - - return output From ba369f98a7d71353e9194e4f1097a9cab6a2f847 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Sun, 24 Jan 2021 16:28:46 +0800 Subject: [PATCH 07/15] support only fp16 --- python/tvm/topi/cuda/conv2d_alter_op.py | 2 +- python/tvm/topi/cuda/tensorcore_alter_op.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 5304a12132e6..54541cfa02ac 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -347,7 +347,7 @@ def _conv2d_legalize(attrs, inputs, arg_types): else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out - elif data_dtype in ['float16', 'float32']: + elif data_dtype in ['float16']: # todo: support int8/int4 if data_layout == 'NHWC' and kernel_layout == "HWIO": batch = data_tensor.shape[0].value in_channel = data_tensor.shape[3].value diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index 271d4bb3d5f2..fa7a601de3eb 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -59,7 +59,7 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): x, y = inputs # Pad input and output channels to use tensorcore schedule. - if dtype in ['float16', 'float32']: + if dtype in ['float16']: # todo: support int8/int4 B, M, K = x_tensor.shape B, N, K = y_tensor.shape M = M.value @@ -123,7 +123,7 @@ def _dense_legalize(attrs, inputs, arg_types): x, y = inputs # Pad input and output channels to use tensorcore schedule. - if dtype in ['float16', 'float32']: + if dtype in ['float16']: # todo: support int8/int4 M, K = x_tensor.shape N, K = y_tensor.shape try: From a3a38f3b045bdf708dffe6d04be9d97c4143378f Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Sun, 24 Jan 2021 18:40:37 +0800 Subject: [PATCH 08/15] add tests/python/relay/test_pass_legalize_tensorcore.py --- .../relay/test_pass_legalize_tensorcore.py | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 tests/python/relay/test_pass_legalize_tensorcore.py diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py new file mode 100644 index 000000000000..6da3b22ae83e --- /dev/null +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test legalize pass""" +import numpy as np +import tvm +from tvm import te +from tvm import topi +from tvm import relay +from tvm.contrib import graph_runtime +from tvm.relay import transform, analysis +from tvm.relay.testing.temp_op_attr import TempOpAttr + + +def run_opt_pass(expr, passes): + passes = passes if isinstance(passes, list) else [passes] + mod = tvm.IRModule.from_expr(expr) + seq = tvm.transform.Sequential(passes) + with tvm.transform.PassContext(opt_level=3): + mod = seq(mod) + entry = mod["main"] + return entry if isinstance(expr, relay.Function) else entry.body + + +def test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): + """Test directly replacing an operator with a new one""" + batch = data_shape[0] + in_channel = data_shape[3] + out_channel = kernel_shape[3] + out_shape = list(data_shape) + out_shape[3] = out_channel + db, di, do = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.conv2d(x, weight, channels=out_channel, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO") + y = relay.Function([x, weight], y) + return y + + def legalize_conv2d(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.conv2d_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if db or di: + x_pad = relay.nn.pad(x, pad_width=((0, db), (0, 0), (0, 0), (0, di))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if di or do: + weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, 0), (0, di), (0, do))) + else: + weight_pad = weight + y_pad = relay.nn.conv2d( + x_pad, + weight=weight_pad, + channels=out_channel+do, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="NHWC", + kernel_layout="HWIO", + ) + if db or do: + y = relay.strided_slice(y_pad, begin=[0, 0, 0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_conv2d): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + +if __name__ == "__main__": + # pad batch + test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0)) + test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0)) + test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), False) + # pad in_channel + test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0)) + test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0)) + test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0)) + test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), False) + # pad out_channel + test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1)) + test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31)) + test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), False) From 625d573e18b1364502d021e28330512b1b5ac5a5 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 25 Jan 2021 14:11:17 +0800 Subject: [PATCH 09/15] add tests for legalize tensorcore --- python/tvm/topi/cuda/tensorcore_alter_op.py | 43 ++++-- .../relay/test_pass_legalize_tensorcore.py | 124 +++++++++++++++++- 2 files changed, 148 insertions(+), 19 deletions(-) diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index fa7a601de3eb..e9e8d9a2e061 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -82,14 +82,22 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): return None logger.info("batch_matmul pad_to_tensorcore, extra_flops %s" % extra_flops) - - x_ = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) - y_ = relay.nn.pad(y, pad_width=((0, 0), (0, dn), (0, dk))) + if dm or dk: + x_ = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) + else: + x_ = x + if dn or dk: + y_ = relay.nn.pad(y, pad_width=((0, 0), (0, dn), (0, dk))) + else: + y_ = y out_ = relay.nn.batch_matmul(x_, y_) - original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out_, - begin=[0, 0, 0], - end=original_out_shape) + if dm or dn: + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out_, + begin=[0, 0, 0], + end=original_out_shape) + else: + out = out_ return out return None @@ -149,13 +157,22 @@ def _dense_legalize(attrs, inputs, arg_types): logger.info("dense pad_to_tensorcore, extra_flops_ratio %s" % extra_flops_ratio) - x_ = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) - y_ = relay.nn.pad(y, pad_width=((0, dn), (0, dk))) + if dm or dk: + x_ = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) + else: + x_ = x + if dn or dk: + y_ = relay.nn.pad(y, pad_width=((0, dn), (0, dk))) + else: + y_ = y out_ = relay.nn.dense(x_, y_) - original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out_, - begin=[0, 0], - end=original_out_shape) + if dm or dn: + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out_, + begin=[0, 0], + end=original_out_shape) + else: + out = out_ return out return None diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index 6da3b22ae83e..0e44a0e17a4c 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -36,9 +36,7 @@ def run_opt_pass(expr, passes): def test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): - """Test directly replacing an operator with a new one""" - batch = data_shape[0] - in_channel = data_shape[3] + """test legalize conv2d to enable tensorcore""" out_channel = kernel_shape[3] out_shape = list(data_shape) out_shape[3] = out_channel @@ -91,17 +89,131 @@ def expected(): assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) +def test_legalize_dense(data_shape, kernel_shape, pad_shape, do_pad=True): + """test legalize dense to enable tensorcore""" + M, K = data_shape + N, _ = kernel_shape + out_shape = (M, N) + dm, dk, dn = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.dense(x, weight) + y = relay.Function([x, weight], y) + return y + + def legalize_dense(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.dense_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if dm or dk: + x_pad = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if dn or dk: + weight_pad = relay.nn.pad(weight, pad_width=((0, dn), (0, dk))) + else: + weight_pad = weight + y_pad = relay.nn.dense( + x_pad, + weight_pad, + ) + if dm or dn: + y = relay.strided_slice(y_pad, begin=[0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.dense", "FTVMLegalize", legalize_dense): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + +def test_legalize_batch_matmul(data_shape, kernel_shape, pad_shape, do_pad=True): + """test legalize dense to enable tensorcore""" + B, M, _ = data_shape + _, N, _ = kernel_shape + out_shape = (B, M, N) + dm, dk, dn = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.batch_matmul(x, weight) + y = relay.Function([x, weight], y) + return y + + def legalize_batch_matmul(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.batch_matmul_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if dm or dk: + x_pad = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if dn or dk: + weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, dn), (0, dk))) + else: + weight_pad = weight + y_pad = relay.nn.batch_matmul( + x_pad, + weight_pad, + ) + if dm or dn: + y = relay.strided_slice(y_pad, begin=[0, 0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.batch_matmul", "FTVMLegalize", legalize_batch_matmul): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + if __name__ == "__main__": - # pad batch + # conv2d pad batch test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0)) test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0)) test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), False) - # pad in_channel + # conv2d pad in_channel test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0)) test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0)) test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0)) test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), False) - # pad out_channel + # conv2d pad out_channel test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1)) test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31)) test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), False) + # dense + test_legalize_dense((8, 16), (32, 16), (0, 0, 0), False) + test_legalize_dense((7, 16), (32, 16), (1, 0, 0)) + test_legalize_dense((8, 15), (32, 15), (0, 1, 0)) + test_legalize_dense((8, 16), (31, 16), (0, 0, 1)) + test_legalize_dense((7, 15), (31, 15), (1, 1, 1)) + test_legalize_dense((3, 16), (32, 16), (5, 0, 0)) + test_legalize_dense((2, 16), (32, 16), (0, 0, 0), False) + # batch_matmul + test_legalize_batch_matmul((16, 8, 16), (16, 32, 16), (0, 0, 0), False) + test_legalize_batch_matmul((16, 7, 16), (16, 32, 16), (1, 0, 0)) + test_legalize_batch_matmul((16, 8, 15), (16, 32, 15), (0, 1, 0)) + test_legalize_batch_matmul((16, 8, 16), (16, 31, 16), (0, 0, 1)) + test_legalize_batch_matmul((16, 7, 15), (16, 31, 15), (1, 1, 1)) + test_legalize_batch_matmul((16, 3, 16), (16, 32, 16), (5, 0, 0)) + test_legalize_batch_matmul((16, 2, 16), (16, 32, 16), (0, 0, 0), False) From 3184af666a74e17ed18d0a2bb5c66a0fe6054962 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 25 Jan 2021 14:16:51 +0800 Subject: [PATCH 10/15] fix pylint --- python/tvm/topi/cuda/conv2d_alter_op.py | 14 +++++---- python/tvm/topi/cuda/tensorcore_alter_op.py | 30 +++++++++---------- .../relay/test_pass_legalize_tensorcore.py | 12 ++++++-- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 54541cfa02ac..c9f0d90e3895 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -347,15 +347,17 @@ def _conv2d_legalize(attrs, inputs, arg_types): else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out - elif data_dtype in ['float16']: # todo: support int8/int4 - if data_layout == 'NHWC' and kernel_layout == "HWIO": + elif data_dtype in ["float16"]: # todo: support int8/int4 + if data_layout == "NHWC" and kernel_layout == "HWIO": batch = data_tensor.shape[0].value in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[3].value - if ((batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0) or \ - (batch % 16 == 0 and in_channel % 16 == 0 and out_channel % 16 == 0) or \ - (batch % 32 == 0 and in_channel % 16 == 0 and out_channel % 8 == 0)): + if ( + (batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0) + or (batch % 16 == 0 and in_channel % 16 == 0 and out_channel % 16 == 0) + or (batch % 32 == 0 and in_channel % 16 == 0 and out_channel % 8 == 0) + ): # no need to pad return None @@ -382,7 +384,7 @@ def _conv2d_legalize(attrs, inputs, arg_types): if do != 0: new_out_channel = out_channel + do - new_attrs['channels'] = new_out_channel + new_attrs["channels"] = new_out_channel out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) else: out = relay.nn.conv2d(data, kernel, **new_attrs) diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index e9e8d9a2e061..0d57af3eef36 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -27,7 +27,7 @@ from .. import nn from ..utils import get_const_tuple -logger = logging.getLogger('topi') +logger = logging.getLogger("topi") @nn.batch_matmul_legalize.register("cuda") @@ -59,16 +59,18 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): x, y = inputs # Pad input and output channels to use tensorcore schedule. - if dtype in ['float16']: # todo: support int8/int4 + if dtype in ["float16"]: # todo: support int8/int4 B, M, K = x_tensor.shape B, N, K = y_tensor.shape M = M.value K = K.value N = N.value - if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ - (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ - (M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): + if ( + (M % 8 == 0 and K % 16 == 0 and N % 32 == 0) + or (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) + or (M % 32 == 0 and K % 16 == 0 and N % 8 == 0) + ): "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" # no need to pad return None @@ -93,9 +95,7 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): out_ = relay.nn.batch_matmul(x_, y_) if dm or dn: original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out_, - begin=[0, 0, 0], - end=original_out_shape) + out = relay.strided_slice(out_, begin=[0, 0, 0], end=original_out_shape) else: out = out_ return out @@ -131,7 +131,7 @@ def _dense_legalize(attrs, inputs, arg_types): x, y = inputs # Pad input and output channels to use tensorcore schedule. - if dtype in ['float16']: # todo: support int8/int4 + if dtype in ["float16"]: # todo: support int8/int4 M, K = x_tensor.shape N, K = y_tensor.shape try: @@ -142,9 +142,11 @@ def _dense_legalize(attrs, inputs, arg_types): # todo: deal with unfixed shape when compiling wdl model return None - if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ - (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ - (M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): + if ( + (M % 8 == 0 and K % 16 == 0 and N % 32 == 0) + or (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) + or (M % 32 == 0 and K % 16 == 0 and N % 8 == 0) + ): "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" # no need to pad return None @@ -168,9 +170,7 @@ def _dense_legalize(attrs, inputs, arg_types): out_ = relay.nn.dense(x_, y_) if dm or dn: original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out_, - begin=[0, 0], - end=original_out_shape) + out = relay.strided_slice(out_, begin=[0, 0], end=original_out_shape) else: out = out_ return out diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index 0e44a0e17a4c..a1f8f18212da 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -45,7 +45,15 @@ def test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): def before(): x = relay.var("x", shape=data_shape, dtype="float16") weight = relay.var("weight", shape=kernel_shape, dtype="float16") - y = relay.nn.conv2d(x, weight, channels=out_channel, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", kernel_layout="HWIO") + y = relay.nn.conv2d( + x, + weight, + channels=out_channel, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="NHWC", + kernel_layout="HWIO", + ) y = relay.Function([x, weight], y) return y @@ -69,7 +77,7 @@ def expected(): y_pad = relay.nn.conv2d( x_pad, weight=weight_pad, - channels=out_channel+do, + channels=out_channel + do, kernel_size=(3, 3), padding=(1, 1), data_layout="NHWC", From 54d3732c24456218e97ca8073f0932897e5f3dfd Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 25 Jan 2021 14:38:19 +0800 Subject: [PATCH 11/15] fix pylint --- python/tvm/topi/cuda/conv2d_alter_op.py | 4 ++-- python/tvm/topi/cuda/tensorcore_alter_op.py | 21 ++++++++------------- python/tvm/topi/nn/batch_matmul.py | 1 + python/tvm/topi/nn/dense.py | 1 + 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index c9f0d90e3895..48d4246b83c2 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -364,10 +364,10 @@ def _conv2d_legalize(attrs, inputs, arg_types): (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel) if extra_flops > 2: - logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) return None - logger.info("conv2d pad_to_tensorcore, extra_flops %s" % extra_flops) + logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) # Pad batch size if db != 0: diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index 0d57af3eef36..aec7acbfde56 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -18,14 +18,10 @@ """Tensorcore alter op and legalize functions for cuda backend""" import logging -import tvm -from tvm import te -from tvm import relay -from tvm import autotvm import math +from tvm import relay from .. import nn -from ..utils import get_const_tuple logger = logging.getLogger("topi") @@ -66,24 +62,22 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): K = K.value N = N.value + # The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) if ( (M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or (M % 32 == 0 and K % 16 == 0 and N % 8 == 0) ): - "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" # no need to pad return None - # todo: 1. check the padding size 2. pad to 8*16*32/32*16*8 liuxin 2020/7/15 - (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N) if extra_flops > 2: - logger.info("batch_matmul pad_to_tensorcore skipped, extra_flops %s" % extra_flops) + logger.info("batch_matmul pad_to_tensorcore skipped, extra_flops %s", extra_flops) return None - logger.info("batch_matmul pad_to_tensorcore, extra_flops %s" % extra_flops) + logger.info("batch_matmul pad_to_tensorcore, extra_flops %s", extra_flops) if dm or dk: x_ = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) else: @@ -142,22 +136,22 @@ def _dense_legalize(attrs, inputs, arg_types): # todo: deal with unfixed shape when compiling wdl model return None + # The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) if ( (M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or (M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or (M % 32 == 0 and K % 16 == 0 and N % 8 == 0) ): - "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" # no need to pad return None (dm, dk, dn), extra_flops_ratio = pad_to_tensorcore(M, K, N) if extra_flops_ratio > 2: - logger.info("dense pad_to_tensorcore skipped, extra_flops_ratio %s" % extra_flops_ratio) + logger.info("dense pad_to_tensorcore skipped, extra_flops_ratio %s", extra_flops_ratio) return None - logger.info("dense pad_to_tensorcore, extra_flops_ratio %s" % extra_flops_ratio) + logger.info("dense pad_to_tensorcore, extra_flops_ratio %s", extra_flops_ratio) if dm or dk: x_ = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) @@ -178,6 +172,7 @@ def _dense_legalize(attrs, inputs, arg_types): def pad_to_tensorcore(M, K, N): + """pad shape to enable tensorcore""" candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] flops = M * K * N diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index dccb103fabd5..accd2a8fc1b5 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -99,4 +99,5 @@ def batch_matmul_legalize(attrs, inputs, types): The legalized expr """ # not to change by default + #pylint: disable=unused-argument return None diff --git a/python/tvm/topi/nn/dense.py b/python/tvm/topi/nn/dense.py index ed58f51bee26..5cff8714ac23 100644 --- a/python/tvm/topi/nn/dense.py +++ b/python/tvm/topi/nn/dense.py @@ -102,4 +102,5 @@ def dense_legalize(attrs, inputs, types): The legalized expr """ # not to change by default + #pylint: disable=unused-argument return None From 01c469d35ec035d0ceff2d7d8c09255d65e7af38 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Mon, 25 Jan 2021 15:27:53 +0800 Subject: [PATCH 12/15] code format --- python/tvm/topi/nn/batch_matmul.py | 2 +- python/tvm/topi/nn/dense.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/topi/nn/batch_matmul.py b/python/tvm/topi/nn/batch_matmul.py index accd2a8fc1b5..9c5848129397 100644 --- a/python/tvm/topi/nn/batch_matmul.py +++ b/python/tvm/topi/nn/batch_matmul.py @@ -99,5 +99,5 @@ def batch_matmul_legalize(attrs, inputs, types): The legalized expr """ # not to change by default - #pylint: disable=unused-argument + # pylint: disable=unused-argument return None diff --git a/python/tvm/topi/nn/dense.py b/python/tvm/topi/nn/dense.py index 5cff8714ac23..bb6ea90c3fcd 100644 --- a/python/tvm/topi/nn/dense.py +++ b/python/tvm/topi/nn/dense.py @@ -102,5 +102,5 @@ def dense_legalize(attrs, inputs, types): The legalized expr """ # not to change by default - #pylint: disable=unused-argument + # pylint: disable=unused-argument return None From c15401f532adbae447fee9e1b7993c218b6a7361 Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Tue, 26 Jan 2021 11:28:24 +0800 Subject: [PATCH 13/15] use_gpu test only; fix conv2d_alter_op --- python/tvm/topi/cuda/conv2d_alter_op.py | 5 ++--- tests/python/relay/test_pass_legalize_tensorcore.py | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 48d4246b83c2..65bf9d1f178d 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -385,9 +385,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): if do != 0: new_out_channel = out_channel + do new_attrs["channels"] = new_out_channel - out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) - else: - out = relay.nn.conv2d(data, kernel, **new_attrs) + + out = relay.nn.conv2d(data, kernel, **new_attrs) if db != 0 or do != 0: original_out_shape = [x.value for x in output_tensor.shape] diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index a1f8f18212da..4ee142260141 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -35,6 +35,7 @@ def run_opt_pass(expr, passes): return entry if isinstance(expr, relay.Function) else entry.body +@tvm.testing.uses_gpu def test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): """test legalize conv2d to enable tensorcore""" out_channel = kernel_shape[3] @@ -97,6 +98,7 @@ def expected(): assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) +@tvm.testing.uses_gpu def test_legalize_dense(data_shape, kernel_shape, pad_shape, do_pad=True): """test legalize dense to enable tensorcore""" M, K = data_shape @@ -146,6 +148,7 @@ def expected(): assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) +@tvm.testing.uses_gpu def test_legalize_batch_matmul(data_shape, kernel_shape, pad_shape, do_pad=True): """test legalize dense to enable tensorcore""" B, M, _ = data_shape From 53fe5d84f61c1695a2ad0c8a593088358b4f935b Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Tue, 26 Jan 2021 12:35:19 +0800 Subject: [PATCH 14/15] fix tests params --- .../relay/test_pass_legalize_tensorcore.py | 373 +++++++++--------- 1 file changed, 191 insertions(+), 182 deletions(-) diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index 4ee142260141..5ecda4ba07a8 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -36,195 +36,204 @@ def run_opt_pass(expr, passes): @tvm.testing.uses_gpu -def test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): +def test_legalize_conv2d(): """test legalize conv2d to enable tensorcore""" - out_channel = kernel_shape[3] - out_shape = list(data_shape) - out_shape[3] = out_channel - db, di, do = pad_shape - - def before(): - x = relay.var("x", shape=data_shape, dtype="float16") - weight = relay.var("weight", shape=kernel_shape, dtype="float16") - y = relay.nn.conv2d( - x, - weight, - channels=out_channel, - kernel_size=(3, 3), - padding=(1, 1), - data_layout="NHWC", - kernel_layout="HWIO", - ) - y = relay.Function([x, weight], y) - return y - - def legalize_conv2d(attrs, inputs, types): - with tvm.target.Target("cuda"): - return topi.nn.conv2d_legalize(attrs, inputs, types) - - def expected(): - if not do_pad: - return before() - x = relay.var("x", shape=data_shape, dtype="float16") - if db or di: - x_pad = relay.nn.pad(x, pad_width=((0, db), (0, 0), (0, 0), (0, di))) - else: - x_pad = x - weight = relay.var("weight", shape=(kernel_shape), dtype="float16") - if di or do: - weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, 0), (0, di), (0, do))) - else: - weight_pad = weight - y_pad = relay.nn.conv2d( - x_pad, - weight=weight_pad, - channels=out_channel + do, - kernel_size=(3, 3), - padding=(1, 1), - data_layout="NHWC", - kernel_layout="HWIO", - ) - if db or do: - y = relay.strided_slice(y_pad, begin=[0, 0, 0, 0], end=out_shape) - else: - y = y_pad - y = relay.Function([x, weight], y) - return y - - with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_conv2d): - a = before() - a = run_opt_pass(a, transform.Legalize()) - b = run_opt_pass(expected(), transform.InferType()) - assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + def _test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): + out_channel = kernel_shape[3] + out_shape = list(data_shape) + out_shape[3] = out_channel + db, di, do = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.conv2d( + x, + weight, + channels=out_channel, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="NHWC", + kernel_layout="HWIO", + ) + y = relay.Function([x, weight], y) + return y + + def legalize_conv2d(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.conv2d_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if db or di: + x_pad = relay.nn.pad(x, pad_width=((0, db), (0, 0), (0, 0), (0, di))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if di or do: + weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, 0), (0, di), (0, do))) + else: + weight_pad = weight + y_pad = relay.nn.conv2d( + x_pad, + weight=weight_pad, + channels=out_channel + do, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="NHWC", + kernel_layout="HWIO", + ) + if db or do: + y = relay.strided_slice(y_pad, begin=[0, 0, 0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_conv2d): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + # conv2d pad batch + _test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0)) + _test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0)) + _test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), False) + # conv2d pad in_channel + _test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0)) + _test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0)) + _test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0)) + _test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), False) + # conv2d pad out_channel + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1)) + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31)) + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), False) @tvm.testing.uses_gpu -def test_legalize_dense(data_shape, kernel_shape, pad_shape, do_pad=True): - """test legalize dense to enable tensorcore""" - M, K = data_shape - N, _ = kernel_shape - out_shape = (M, N) - dm, dk, dn = pad_shape - - def before(): - x = relay.var("x", shape=data_shape, dtype="float16") - weight = relay.var("weight", shape=kernel_shape, dtype="float16") - y = relay.nn.dense(x, weight) - y = relay.Function([x, weight], y) - return y - - def legalize_dense(attrs, inputs, types): - with tvm.target.Target("cuda"): - return topi.nn.dense_legalize(attrs, inputs, types) - - def expected(): - if not do_pad: - return before() - x = relay.var("x", shape=data_shape, dtype="float16") - if dm or dk: - x_pad = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) - else: - x_pad = x - weight = relay.var("weight", shape=(kernel_shape), dtype="float16") - if dn or dk: - weight_pad = relay.nn.pad(weight, pad_width=((0, dn), (0, dk))) - else: - weight_pad = weight - y_pad = relay.nn.dense( - x_pad, - weight_pad, - ) - if dm or dn: - y = relay.strided_slice(y_pad, begin=[0, 0], end=out_shape) - else: - y = y_pad - y = relay.Function([x, weight], y) - return y - - with TempOpAttr("nn.dense", "FTVMLegalize", legalize_dense): - a = before() - a = run_opt_pass(a, transform.Legalize()) - b = run_opt_pass(expected(), transform.InferType()) - assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) +def test_legalize_dense(): + def _test_legalize_dense(data_shape, kernel_shape, pad_shape, do_pad=True): + """test legalize dense to enable tensorcore""" + M, K = data_shape + N, _ = kernel_shape + out_shape = (M, N) + dm, dk, dn = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.dense(x, weight) + y = relay.Function([x, weight], y) + return y + + def legalize_dense(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.dense_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if dm or dk: + x_pad = relay.nn.pad(x, pad_width=((0, dm), (0, dk))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if dn or dk: + weight_pad = relay.nn.pad(weight, pad_width=((0, dn), (0, dk))) + else: + weight_pad = weight + y_pad = relay.nn.dense( + x_pad, + weight_pad, + ) + if dm or dn: + y = relay.strided_slice(y_pad, begin=[0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.dense", "FTVMLegalize", legalize_dense): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + # dense + _test_legalize_dense((8, 16), (32, 16), (0, 0, 0), False) + _test_legalize_dense((7, 16), (32, 16), (1, 0, 0)) + _test_legalize_dense((8, 15), (32, 15), (0, 1, 0)) + _test_legalize_dense((8, 16), (31, 16), (0, 0, 1)) + _test_legalize_dense((7, 15), (31, 15), (1, 1, 1)) + _test_legalize_dense((3, 16), (32, 16), (5, 0, 0)) + _test_legalize_dense((2, 16), (32, 16), (0, 0, 0), False) @tvm.testing.uses_gpu -def test_legalize_batch_matmul(data_shape, kernel_shape, pad_shape, do_pad=True): - """test legalize dense to enable tensorcore""" - B, M, _ = data_shape - _, N, _ = kernel_shape - out_shape = (B, M, N) - dm, dk, dn = pad_shape - - def before(): - x = relay.var("x", shape=data_shape, dtype="float16") - weight = relay.var("weight", shape=kernel_shape, dtype="float16") - y = relay.nn.batch_matmul(x, weight) - y = relay.Function([x, weight], y) - return y - - def legalize_batch_matmul(attrs, inputs, types): - with tvm.target.Target("cuda"): - return topi.nn.batch_matmul_legalize(attrs, inputs, types) - - def expected(): - if not do_pad: - return before() - x = relay.var("x", shape=data_shape, dtype="float16") - if dm or dk: - x_pad = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) - else: - x_pad = x - weight = relay.var("weight", shape=(kernel_shape), dtype="float16") - if dn or dk: - weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, dn), (0, dk))) - else: - weight_pad = weight - y_pad = relay.nn.batch_matmul( - x_pad, - weight_pad, - ) - if dm or dn: - y = relay.strided_slice(y_pad, begin=[0, 0, 0], end=out_shape) - else: - y = y_pad - y = relay.Function([x, weight], y) - return y - - with TempOpAttr("nn.batch_matmul", "FTVMLegalize", legalize_batch_matmul): - a = before() - a = run_opt_pass(a, transform.Legalize()) - b = run_opt_pass(expected(), transform.InferType()) - assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) +def test_legalize_batch_matmul(): + def _test_legalize_batch_matmul(data_shape, kernel_shape, pad_shape, do_pad=True): + """test legalize dense to enable tensorcore""" + B, M, _ = data_shape + _, N, _ = kernel_shape + out_shape = (B, M, N) + dm, dk, dn = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype="float16") + weight = relay.var("weight", shape=kernel_shape, dtype="float16") + y = relay.nn.batch_matmul(x, weight) + y = relay.Function([x, weight], y) + return y + + def legalize_batch_matmul(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.batch_matmul_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype="float16") + if dm or dk: + x_pad = relay.nn.pad(x, pad_width=((0, 0), (0, dm), (0, dk))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + if dn or dk: + weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, dn), (0, dk))) + else: + weight_pad = weight + y_pad = relay.nn.batch_matmul( + x_pad, + weight_pad, + ) + if dm or dn: + y = relay.strided_slice(y_pad, begin=[0, 0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.batch_matmul", "FTVMLegalize", legalize_batch_matmul): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + + _test_legalize_batch_matmul((16, 8, 16), (16, 32, 16), (0, 0, 0), False) + _test_legalize_batch_matmul((16, 7, 16), (16, 32, 16), (1, 0, 0)) + _test_legalize_batch_matmul((16, 8, 15), (16, 32, 15), (0, 1, 0)) + _test_legalize_batch_matmul((16, 8, 16), (16, 31, 16), (0, 0, 1)) + _test_legalize_batch_matmul((16, 7, 15), (16, 31, 15), (1, 1, 1)) + _test_legalize_batch_matmul((16, 3, 16), (16, 32, 16), (5, 0, 0)) + _test_legalize_batch_matmul((16, 2, 16), (16, 32, 16), (0, 0, 0), False) if __name__ == "__main__": - # conv2d pad batch - test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0)) - test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0)) - test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), False) - # conv2d pad in_channel - test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0)) - test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0)) - test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0)) - test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), False) - # conv2d pad out_channel - test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1)) - test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31)) - test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), False) - # dense - test_legalize_dense((8, 16), (32, 16), (0, 0, 0), False) - test_legalize_dense((7, 16), (32, 16), (1, 0, 0)) - test_legalize_dense((8, 15), (32, 15), (0, 1, 0)) - test_legalize_dense((8, 16), (31, 16), (0, 0, 1)) - test_legalize_dense((7, 15), (31, 15), (1, 1, 1)) - test_legalize_dense((3, 16), (32, 16), (5, 0, 0)) - test_legalize_dense((2, 16), (32, 16), (0, 0, 0), False) - # batch_matmul - test_legalize_batch_matmul((16, 8, 16), (16, 32, 16), (0, 0, 0), False) - test_legalize_batch_matmul((16, 7, 16), (16, 32, 16), (1, 0, 0)) - test_legalize_batch_matmul((16, 8, 15), (16, 32, 15), (0, 1, 0)) - test_legalize_batch_matmul((16, 8, 16), (16, 31, 16), (0, 0, 1)) - test_legalize_batch_matmul((16, 7, 15), (16, 31, 15), (1, 1, 1)) - test_legalize_batch_matmul((16, 3, 16), (16, 32, 16), (5, 0, 0)) - test_legalize_batch_matmul((16, 2, 16), (16, 32, 16), (0, 0, 0), False) + test_legalize_conv2d() + test_legalize_dense() + test_legalize_batch_matmul() From e1f3debf5cb2f23210bab19655b4051bd740aafd Mon Sep 17 00:00:00 2001 From: "liuxin.ai" Date: Tue, 26 Jan 2021 14:09:26 +0800 Subject: [PATCH 15/15] revert transform fix --- python/tvm/relay/op/transform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 9c309838d414..7e7f9b299593 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -21,7 +21,7 @@ from . import _make from .dyn import _make as _dyn_make from .tensor import shape_of -from ..expr import TupleWrapper, const, Expr, Tuple, Constant +from ..expr import TupleWrapper, const, Expr, Tuple from ...tir import expr as _expr @@ -884,7 +884,7 @@ def strided_slice(data, begin, end, strides=None, slice_mode="end"): The computed result. """ strides = strides or [1] - if any([(isinstance(i, Expr) and not isinstance(i, Constant)) for i in (begin, end, strides)]): + if isinstance(begin, Expr) or isinstance(end, Expr) or isinstance(strides, Expr): if isinstance(begin, (tuple, list)): begin = const(list(begin)) if isinstance(end, (tuple, list)):