diff --git a/python/tvm/topi/arm_cpu/conv2d.py b/python/tvm/topi/arm_cpu/conv2d.py index 24f0fb101016..a478818084d5 100644 --- a/python/tvm/topi/arm_cpu/conv2d.py +++ b/python/tvm/topi/arm_cpu/conv2d.py @@ -33,10 +33,7 @@ schedule_conv2d_spatial_pack_nchw, schedule_conv2d_spatial_pack_nhwc, ) -from .mprofile.dsp.conv2d import ( - conv2d_nhwc_dsp_compute, - conv2d_nhwc_dsp_schedule, -) +from .mprofile.dsp.conv2d import conv2d_nhwc_dsp_compute, conv2d_nhwc_dsp_schedule @autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu") @@ -267,13 +264,7 @@ def _schedule_winograd(cfg, s, output, last): if isinstance(U.op, tvm.te.ComputeOp): kernel, G = U.op.input_tensors s[G].compute_inline() - ( - eps, - nu, - k, - c, - kk, - ) = s[U].op.axis + (eps, nu, k, c, kk) = s[U].op.axis if autotvm.GLOBAL_SCOPE.in_tuning: # kernel transformation will be pre-computed during compilation, so we skip # this part to make tuning records correct diff --git a/python/tvm/topi/arm_cpu/conv2d_alter_op.py b/python/tvm/topi/arm_cpu/conv2d_alter_op.py index d9379bd77272..b0fdb99cbe33 100644 --- a/python/tvm/topi/arm_cpu/conv2d_alter_op.py +++ b/python/tvm/topi/arm_cpu/conv2d_alter_op.py @@ -152,9 +152,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): ) dispatch_ctx.update(target, new_workload, cfg) return relay.nn.conv2d( - inputs[0], - relay.Constant(tvm.nd.array(reshaped_new_kernel)), - **new_attrs, + inputs[0], relay.Constant(tvm.nd.array(reshaped_new_kernel)), **new_attrs ) # Only microTVM does layout alteration for NHWC layout with real data types diff --git a/python/tvm/topi/cuda/dense.py b/python/tvm/topi/cuda/dense.py index 9fd53ba1fee8..fa2c4a0f9d6d 100644 --- a/python/tvm/topi/cuda/dense.py +++ b/python/tvm/topi/cuda/dense.py @@ -29,13 +29,7 @@ def _matmul_cublas_common( - cfg, - tensor_a, - tensor_b, - bias=None, - out_dtype=None, - transpose_a=False, - transpose_b=False, + cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False ): assert len(tensor_a.shape) == 2 and len(tensor_b.shape) == 2, "only support 2-dim matmul" if bias is not None: @@ -58,13 +52,7 @@ def _matmul_cublas_common( @autotvm.register_topi_compute("matmul_cublas.cuda") def matmul_cublas( - cfg, - tensor_a, - tensor_b, - bias=None, - out_dtype=None, - transpose_a=False, - transpose_b=False, + cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False ): """Matmul operator on CUDA with CUBLAS""" return _matmul_cublas_common(cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b) diff --git a/python/tvm/topi/cuda/sparse.py b/python/tvm/topi/cuda/sparse.py index 77ed8435acb6..921075601e5a 100644 --- a/python/tvm/topi/cuda/sparse.py +++ b/python/tvm/topi/cuda/sparse.py @@ -159,9 +159,10 @@ def gen_ir(data, w_data, w_indices, w_indptr, out): bs_m = bs_n mb = m // bs_m mi = warp_size - assert ( - mb >= mi - ), f"Number of block rows in dense matrix must be larger than warp size: {warp_size} vs {mb}." + assert mb >= mi, ( + f"Number of block rows in dense matrix must be larger than warp size: " + f"{warp_size} vs {mb}." + ) mo = ceil_div(mb, mi) ni = 1 # TODO(tkonolige): how do I compute the number of warps per block? no = ceil_div(nb, ni) diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py index 83a1c4df3658..189bdf9cbd7c 100644 --- a/python/tvm/topi/generic/conv2d.py +++ b/python/tvm/topi/generic/conv2d.py @@ -45,8 +45,12 @@ def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements): dilated_kernel_w = (wkl.kernel_w - 1) * wkl.dilation_w + 1 out_width = (wkl.width + pl + pr - dilated_kernel_w) // WSTR + 1 - assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}" - assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}" + assert ( + wkl.out_filter % int32_lanes == 0 + ), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}" + assert ( + wkl.in_filter % num_int8_elements == 0 + ), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}" oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements ic_bn = 1 @@ -87,8 +91,12 @@ def fallback_schedule_cpu_1x1_int8(cfg, wkl, int32_lanes, num_int8_elements): out_height = (wkl.height + pt + pb - wkl.kernel_h) // HSTR + 1 out_width = (wkl.width + pl + pr - wkl.kernel_w) // WSTR + 1 - assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}" - assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}" + assert ( + wkl.out_filter % int32_lanes == 0 + ), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}" + assert ( + wkl.in_filter % num_int8_elements == 0 + ), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}" oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements ic_bn = 1 diff --git a/python/tvm/topi/image/resize.py b/python/tvm/topi/image/resize.py index c100459d9c8a..29ed03f62e74 100644 --- a/python/tvm/topi/image/resize.py +++ b/python/tvm/topi/image/resize.py @@ -314,14 +314,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): if boxes is not None: # TODO(mbrookhart): Find an example of this raise NotImplementedError("resize1d with image boxes not yet implemented") - in_x = get_inx( - x, - image_width, - target_width, - coordinate_transformation_mode, - roi[0], - roi[1], - ) + in_x = get_inx(x, image_width, target_width, coordinate_transformation_mode, roi[0], roi[1]) if method == "nearest_neighbor": if rounding_method == "": @@ -332,17 +325,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): closest_x_index = get_closest_index(in_x, rounding_method, boxes) - value = get_1d_pixel( - data, - layout, - image_width, - box_idx, - c, - closest_x_index, - cc, - inum, - ic, - ) + value = get_1d_pixel(data, layout, image_width, box_idx, c, closest_x_index, cc, inum, ic) elif method == "linear": x_int = te.floor(in_x).astype("int32") @@ -350,17 +333,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): p = [0 for i in range(2)] for i in range(2): - p[i] = get_1d_pixel( - data, - layout, - image_width, - box_idx, - c, - x_int + i, - cc, - inum, - ic, - ) + p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, x_int + i, cc, inum, ic) value = _lerp(*p, x_lerp) @@ -371,17 +344,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None): # Get the surrounding values p = [0 for i in range(4)] for i in range(4): - p[i] = get_1d_pixel( - data, - layout, - image_width, - box_idx, - c, - xint + i - 1, - cc, - inum, - ic, - ) + p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, xint + i - 1, cc, inum, ic) wx = _cubic_spline_weights(xfract, alpha) if exclude_outside: diff --git a/python/tvm/topi/nn/conv2d.py b/python/tvm/topi/nn/conv2d.py index ced0615b4f75..f70d749e0f3c 100644 --- a/python/tvm/topi/nn/conv2d.py +++ b/python/tvm/topi/nn/conv2d.py @@ -1224,8 +1224,7 @@ def _conv2d_winograd_nhwc_impl( kernel_pack = te.compute( (alpha, alpha, CO, CI), lambda eps, nu, co, ci: te.sum( - weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw], - axis=[r_kh, r_kw], + weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw] ), name="kernel_pack", ) @@ -1243,10 +1242,7 @@ def _conv2d_winograd_nhwc_impl( input_tile = te.compute( (alpha, alpha, P, CI), lambda eps, nu, p, ci: data_pad[ - p // (nH * nW), - ((p // nW) % nH) * m + eps, - (p % nW) * m + nu, - ci, + p // (nH * nW), ((p // nW) % nH) * m + eps, (p % nW) * m + nu, ci ], name="input_tile", attrs={"schedule_rule": "None"}, @@ -1258,8 +1254,7 @@ def _conv2d_winograd_nhwc_impl( data_pack = te.compute( (alpha, alpha, P, CI), lambda eps, nu, p, ci: te.sum( - input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu], - axis=[r_a, r_b], + input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b] ), name="data_pack", attrs={ @@ -1273,8 +1268,7 @@ def _conv2d_winograd_nhwc_impl( bgemm = te.compute( (alpha, alpha, P, CO), lambda eps, nu, p, co: te.sum( - data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci], - axis=[ci], + data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci], axis=[ci] ), name="bgemm", attrs=bgemm_attrs, @@ -1290,8 +1284,7 @@ def _conv2d_winograd_nhwc_impl( inverse = te.compute( (m, m, P, CO), lambda vh, vw, p, co: te.sum( - bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw], - axis=[r_a, r_b], + bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b] ), name="inverse", attrs={ @@ -1303,12 +1296,7 @@ def _conv2d_winograd_nhwc_impl( # output output = te.compute( (N, H, W, CO), - lambda n, h, w, co: inverse[ - h % m, - w % m, - n * nH * nW + (h // m) * nW + (w // m), - co, - ], + lambda n, h, w, co: inverse[h % m, w % m, n * nH * nW + (h // m) * nW + (w // m), co], name="conv2d_winograd", ) @@ -1358,12 +1346,7 @@ def _conv2d_winograd_nchw_impl( assert HSTR == 1 and WSTR == 1 and KH == 3 and KW == 3 pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) - data_pad = pad( - data, - (0, 0, pt, pl), - (0, 0, pb, pr), - name="data_pad", - ) + data_pad = pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad") r = KW m = tile_size @@ -1382,8 +1365,7 @@ def _conv2d_winograd_nchw_impl( kernel_pack = te.compute( (alpha, alpha, CI, CO), lambda eps, nu, ci, co: te.sum( - weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw], - axis=[r_kh, r_kw], + weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw] ), name="kernel_pack", ) @@ -1401,10 +1383,7 @@ def _conv2d_winograd_nchw_impl( input_tile = te.compute( (CI, P, alpha, alpha), lambda ci, p, eps, nu: data_pad[ - p // (nH * nW), - ci, - ((p // nW) % nH) * m + eps, - (p % nW) * m + nu, + p // (nH * nW), ci, ((p // nW) % nH) * m + eps, (p % nW) * m + nu ], name="input_tile", attrs={"schedule_rule": "None"}, @@ -1416,13 +1395,10 @@ def _conv2d_winograd_nchw_impl( data_pack = te.compute( (alpha, alpha, CI, P), lambda eps, nu, ci, p: te.sum( - input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu], - axis=[r_a, r_b], + input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b] ), name="data_pack", - attrs={ - "schedule_rule": "conv2d_nchw_winograd_data_pack", - }, + attrs={"schedule_rule": "conv2d_nchw_winograd_data_pack"}, ) # do batch gemm @@ -1430,8 +1406,7 @@ def _conv2d_winograd_nchw_impl( bgemm = te.compute( (alpha, alpha, CO, P), lambda eps, nu, co, p: te.sum( - data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co], - axis=[ci], + data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co], axis=[ci] ), name="bgemm", attrs=bgemm_attrs, @@ -1443,24 +1418,16 @@ def _conv2d_winograd_nchw_impl( inverse = te.compute( (CO, P, m, m), lambda co, p, vh, vw: te.sum( - bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw], - axis=[r_a, r_b], + bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b] ), name="inverse", - attrs={ - "schedule_rule": "conv2d_nchw_winograd_inverse", - }, + attrs={"schedule_rule": "conv2d_nchw_winograd_inverse"}, ) # output output = te.compute( (N, CO, H, W), - lambda n, co, h, w: inverse[ - co, - n * nH * nW + (h // m) * nW + (w // m), - h % m, - w % m, - ], + lambda n, co, h, w: inverse[co, n * nH * nW + (h // m) * nW + (w // m), h % m, w % m], name="conv2d_winograd", ) diff --git a/python/tvm/topi/nn/depthwise_conv2d.py b/python/tvm/topi/nn/depthwise_conv2d.py index 6553b5970e20..ad1e4a55177f 100644 --- a/python/tvm/topi/nn/depthwise_conv2d.py +++ b/python/tvm/topi/nn/depthwise_conv2d.py @@ -65,27 +65,24 @@ def _get_workload(data, kernel, stride, padding, dilation, out_dtype, data_layou elif data_layout == "NCHWc": _, in_channel_chunk, height, width, in_channel_block = get_const_tuple(data.shape) in_channel = in_channel_chunk * in_channel_block - ( - filter_channel_chunk, - cm_chunk, - kh, - kw, - cm_block, - filter_channel_block, - ) = get_const_tuple(kernel.shape) + (filter_channel_chunk, cm_chunk, kh, kw, cm_block, filter_channel_block) = get_const_tuple( + kernel.shape + ) filter_channel = filter_channel_chunk * filter_channel_block channel_multiplier = cm_chunk * cm_block - assert ( - in_channel_block == filter_channel_block - ), f"Incorrect dimensions, data has block size {in_channel_block}, but filter has block size {filter_channel_block}" + assert in_channel_block == filter_channel_block, ( + f"Incorrect dimensions, data has block size {in_channel_block}, but filter has " + f"block size {filter_channel_block}" + ) else: raise ValueError(f"Data layout {data_layout} not supported") - assert ( - in_channel == filter_channel - ), f"Incorrect dimensions, data has {in_channel} channels but filter expects {filter_channel} channels" + assert in_channel == filter_channel, ( + f"Incorrect dimensions, data has {in_channel} channels but filter expects " + f"{filter_channel} channels" + ) out_channel = filter_channel * channel_multiplier dilation_h, dilation_w = ( diff --git a/python/tvm/topi/nn/utils.py b/python/tvm/topi/nn/utils.py index 59eac1925da8..ce4038ccb601 100644 --- a/python/tvm/topi/nn/utils.py +++ b/python/tvm/topi/nn/utils.py @@ -194,9 +194,10 @@ def get_pad_tuple_generic(padding, kernel): if len(padding) == len(kernel): pad_dimensions = [p * 2 for p in padding] elif len(padding) == len(kernel) * 2: - return [padding[i] for i in range(len(kernel))], [ - padding[len(kernel) + i] for i in range(len(kernel)) - ] + return ( + [padding[i] for i in range(len(kernel))], + [padding[len(kernel) + i] for i in range(len(kernel))], + ) else: raise ValueError("Size of padding can only be len(kernel) or len(kernel) * 2") elif isinstance(padding, int): diff --git a/python/tvm/topi/testing/poolnd_python.py b/python/tvm/topi/testing/poolnd_python.py index aec9e82023c1..486c265a02d8 100644 --- a/python/tvm/topi/testing/poolnd_python.py +++ b/python/tvm/topi/testing/poolnd_python.py @@ -38,10 +38,7 @@ def _get_supported_layout(dims: int): return "NCDHW" -def _convert_to_layout( - input_tensor: np.ndarray, - layout: str, -) -> np.ndarray: +def _convert_to_layout(input_tensor: np.ndarray, layout: str) -> np.ndarray: """ Converts back to original layout after the algorithm is finished """ @@ -55,10 +52,7 @@ def _convert_to_layout( return input_tensor -def _convert_from_layout( - input_tensor: np.ndarray, - layout: str, -) -> np.ndarray: +def _convert_from_layout(input_tensor: np.ndarray, layout: str) -> np.ndarray: """ Converts tensor to one of suppored layouts """ diff --git a/python/tvm/topi/transform.py b/python/tvm/topi/transform.py index 0f66549a3791..934470fe23a3 100644 --- a/python/tvm/topi/transform.py +++ b/python/tvm/topi/transform.py @@ -85,7 +85,8 @@ def expand_like(a, shape_like, axis): # A special case: `a` is a scalar represented as a 1-dim tensor return te.compute(shape_like.shape, lambda *idxs: a(0)) raise ValueError( - f"shape inconsistent when expand_like ({len(axis)}, {len(a.shape)}, {len(shape_like.shape)})" + f"shape inconsistent when expand_like ({len(axis)}, " + f"{len(a.shape)}, {len(shape_like.shape)})" ) real_axis = topi.reduction._get_real_axis(len(shape_like.shape), axis) diff --git a/python/tvm/topi/x86/conv3d.py b/python/tvm/topi/x86/conv3d.py index 0ce02a0d32a0..20f2c4ac128c 100644 --- a/python/tvm/topi/x86/conv3d.py +++ b/python/tvm/topi/x86/conv3d.py @@ -277,15 +277,7 @@ def _conv3d_ndhwc(cfg, data, kernel, strides, padding, dilation, groups, out_dty ci_tile += 1 # pack kernel - shape = ( - num_filter // oc_bn, - ci_tile, - kernel_depth, - kernel_height, - kernel_width, - ic_bn, - oc_bn, - ) + shape = (num_filter // oc_bn, ci_tile, kernel_depth, kernel_height, kernel_width, ic_bn, oc_bn) kernel_vec = te.compute( shape, lambda CO, CI, d, h, w, ci, co: kernel[d, h, w, CI * ic_bn + ci, CO * oc_bn + co], @@ -398,15 +390,7 @@ def _conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, layout, groups, ci_tile += 1 # pack kernel - shape = ( - num_filter // oc_bn, - ci_tile, - kernel_depth, - kernel_height, - kernel_width, - ic_bn, - oc_bn, - ) + shape = (num_filter // oc_bn, ci_tile, kernel_depth, kernel_height, kernel_width, ic_bn, oc_bn) kernel_vec = te.compute( shape, lambda CO, CI, d, h, w, ci, co: kernel[CO * oc_bn + co, CI * ic_bn + ci, d, h, w],