Format updated files

Krzysztof Parzyszek · Krzysztof Parzyszek · commit b44ebdcfacef · 2023-05-10T17:57:13.000-07:00
diff --git a/python/tvm/topi/arm_cpu/conv2d.py b/python/tvm/topi/arm_cpu/conv2d.py
@@ -33,10 +33,7 @@
     schedule_conv2d_spatial_pack_nchw,
     schedule_conv2d_spatial_pack_nhwc,
 )
-from .mprofile.dsp.conv2d import (
-    conv2d_nhwc_dsp_compute,
-    conv2d_nhwc_dsp_schedule,
-)
+from .mprofile.dsp.conv2d import conv2d_nhwc_dsp_compute, conv2d_nhwc_dsp_schedule
 
 
 @autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu")
@@ -267,13 +264,7 @@ def _schedule_winograd(cfg, s, output, last):
     if isinstance(U.op, tvm.te.ComputeOp):
         kernel, G = U.op.input_tensors
         s[G].compute_inline()
-        (
-            eps,
-            nu,
-            k,
-            c,
-            kk,
-        ) = s[U].op.axis
+        (eps, nu, k, c, kk) = s[U].op.axis
         if autotvm.GLOBAL_SCOPE.in_tuning:
             # kernel transformation will be pre-computed during compilation, so we skip
             # this part to make tuning records correct
diff --git a/python/tvm/topi/arm_cpu/conv2d_alter_op.py b/python/tvm/topi/arm_cpu/conv2d_alter_op.py
@@ -152,9 +152,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
         )
         dispatch_ctx.update(target, new_workload, cfg)
         return relay.nn.conv2d(
-            inputs[0],
-            relay.Constant(tvm.nd.array(reshaped_new_kernel)),
-            **new_attrs,
+            inputs[0], relay.Constant(tvm.nd.array(reshaped_new_kernel)), **new_attrs
         )
 
     # Only microTVM does layout alteration for NHWC layout with real data types
diff --git a/python/tvm/topi/cuda/dense.py b/python/tvm/topi/cuda/dense.py
@@ -29,13 +29,7 @@
 
 
 def _matmul_cublas_common(
-    cfg,
-    tensor_a,
-    tensor_b,
-    bias=None,
-    out_dtype=None,
-    transpose_a=False,
-    transpose_b=False,
+    cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
 ):
     assert len(tensor_a.shape) == 2 and len(tensor_b.shape) == 2, "only support 2-dim matmul"
     if bias is not None:
@@ -58,13 +52,7 @@ def _matmul_cublas_common(
 
 @autotvm.register_topi_compute("matmul_cublas.cuda")
 def matmul_cublas(
-    cfg,
-    tensor_a,
-    tensor_b,
-    bias=None,
-    out_dtype=None,
-    transpose_a=False,
-    transpose_b=False,
+    cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
 ):
     """Matmul operator on CUDA with CUBLAS"""
     return _matmul_cublas_common(cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b)
diff --git a/python/tvm/topi/cuda/sparse.py b/python/tvm/topi/cuda/sparse.py
@@ -159,9 +159,10 @@ def gen_ir(data, w_data, w_indices, w_indptr, out):
         bs_m = bs_n
         mb = m // bs_m
         mi = warp_size
-        assert (
-            mb >= mi
-        ), f"Number of block rows in dense matrix must be larger than warp size: {warp_size} vs {mb}."
+        assert mb >= mi, (
+            f"Number of block rows in dense matrix must be larger than warp size: "
+            f"{warp_size} vs {mb}."
+        )
         mo = ceil_div(mb, mi)
         ni = 1  # TODO(tkonolige): how do I compute the number of warps per block?
         no = ceil_div(nb, ni)
diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py
@@ -45,8 +45,12 @@ def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements):
     dilated_kernel_w = (wkl.kernel_w - 1) * wkl.dilation_w + 1
     out_width = (wkl.width + pl + pr - dilated_kernel_w) // WSTR + 1
 
-    assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
-    assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
+    assert (
+        wkl.out_filter % int32_lanes == 0
+    ), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
+    assert (
+        wkl.in_filter % num_int8_elements == 0
+    ), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
 
     oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements
     ic_bn = 1
@@ -87,8 +91,12 @@ def fallback_schedule_cpu_1x1_int8(cfg, wkl, int32_lanes, num_int8_elements):
     out_height = (wkl.height + pt + pb - wkl.kernel_h) // HSTR + 1
     out_width = (wkl.width + pl + pr - wkl.kernel_w) // WSTR + 1
 
-    assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
-    assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
+    assert (
+        wkl.out_filter % int32_lanes == 0
+    ), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
+    assert (
+        wkl.in_filter % num_int8_elements == 0
+    ), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
 
     oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements
     ic_bn = 1
diff --git a/python/tvm/topi/image/resize.py b/python/tvm/topi/image/resize.py
@@ -314,14 +314,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):
     if boxes is not None:
         # TODO(mbrookhart): Find an example of this
         raise NotImplementedError("resize1d with image boxes not yet implemented")
-    in_x = get_inx(
-        x,
-        image_width,
-        target_width,
-        coordinate_transformation_mode,
-        roi[0],
-        roi[1],
-    )
+    in_x = get_inx(x, image_width, target_width, coordinate_transformation_mode, roi[0], roi[1])
 
     if method == "nearest_neighbor":
         if rounding_method == "":
@@ -332,35 +325,15 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):
 
         closest_x_index = get_closest_index(in_x, rounding_method, boxes)
 
-        value = get_1d_pixel(
-            data,
-            layout,
-            image_width,
-            box_idx,
-            c,
-            closest_x_index,
-            cc,
-            inum,
-            ic,
-        )
+        value = get_1d_pixel(data, layout, image_width, box_idx, c, closest_x_index, cc, inum, ic)
     elif method == "linear":
         x_int = te.floor(in_x).astype("int32")
 
         x_lerp = in_x - x_int
 
         p = [0 for i in range(2)]
         for i in range(2):
-            p[i] = get_1d_pixel(
-                data,
-                layout,
-                image_width,
-                box_idx,
-                c,
-                x_int + i,
-                cc,
-                inum,
-                ic,
-            )
+            p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, x_int + i, cc, inum, ic)
 
         value = _lerp(*p, x_lerp)
 
@@ -371,17 +344,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):
         # Get the surrounding values
         p = [0 for i in range(4)]
         for i in range(4):
-            p[i] = get_1d_pixel(
-                data,
-                layout,
-                image_width,
-                box_idx,
-                c,
-                xint + i - 1,
-                cc,
-                inum,
-                ic,
-            )
+            p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, xint + i - 1, cc, inum, ic)
 
         wx = _cubic_spline_weights(xfract, alpha)
         if exclude_outside:
diff --git a/python/tvm/topi/nn/conv2d.py b/python/tvm/topi/nn/conv2d.py
@@ -1224,8 +1224,7 @@ def _conv2d_winograd_nhwc_impl(
         kernel_pack = te.compute(
             (alpha, alpha, CO, CI),
             lambda eps, nu, co, ci: te.sum(
-                weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw],
-                axis=[r_kh, r_kw],
+                weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw]
             ),
             name="kernel_pack",
         )
@@ -1243,10 +1242,7 @@ def _conv2d_winograd_nhwc_impl(
     input_tile = te.compute(
         (alpha, alpha, P, CI),
         lambda eps, nu, p, ci: data_pad[
-            p // (nH * nW),
-            ((p // nW) % nH) * m + eps,
-            (p % nW) * m + nu,
-            ci,
+            p // (nH * nW), ((p // nW) % nH) * m + eps, (p % nW) * m + nu, ci
         ],
         name="input_tile",
         attrs={"schedule_rule": "None"},
@@ -1258,8 +1254,7 @@ def _conv2d_winograd_nhwc_impl(
     data_pack = te.compute(
         (alpha, alpha, P, CI),
         lambda eps, nu, p, ci: te.sum(
-            input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu],
-            axis=[r_a, r_b],
+            input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b]
         ),
         name="data_pack",
         attrs={
@@ -1273,8 +1268,7 @@ def _conv2d_winograd_nhwc_impl(
     bgemm = te.compute(
         (alpha, alpha, P, CO),
         lambda eps, nu, p, co: te.sum(
-            data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci],
-            axis=[ci],
+            data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci], axis=[ci]
         ),
         name="bgemm",
         attrs=bgemm_attrs,
@@ -1290,8 +1284,7 @@ def _conv2d_winograd_nhwc_impl(
     inverse = te.compute(
         (m, m, P, CO),
         lambda vh, vw, p, co: te.sum(
-            bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw],
-            axis=[r_a, r_b],
+            bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b]
         ),
         name="inverse",
         attrs={
@@ -1303,12 +1296,7 @@ def _conv2d_winograd_nhwc_impl(
     # output
     output = te.compute(
         (N, H, W, CO),
-        lambda n, h, w, co: inverse[
-            h % m,
-            w % m,
-            n * nH * nW + (h // m) * nW + (w // m),
-            co,
-        ],
+        lambda n, h, w, co: inverse[h % m, w % m, n * nH * nW + (h // m) * nW + (w // m), co],
         name="conv2d_winograd",
     )
 
@@ -1358,12 +1346,7 @@ def _conv2d_winograd_nchw_impl(
     assert HSTR == 1 and WSTR == 1 and KH == 3 and KW == 3
 
     pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW))
-    data_pad = pad(
-        data,
-        (0, 0, pt, pl),
-        (0, 0, pb, pr),
-        name="data_pad",
-    )
+    data_pad = pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad")
 
     r = KW
     m = tile_size
@@ -1382,8 +1365,7 @@ def _conv2d_winograd_nchw_impl(
         kernel_pack = te.compute(
             (alpha, alpha, CI, CO),
             lambda eps, nu, ci, co: te.sum(
-                weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw],
-                axis=[r_kh, r_kw],
+                weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw]
             ),
             name="kernel_pack",
         )
@@ -1401,10 +1383,7 @@ def _conv2d_winograd_nchw_impl(
     input_tile = te.compute(
         (CI, P, alpha, alpha),
         lambda ci, p, eps, nu: data_pad[
-            p // (nH * nW),
-            ci,
-            ((p // nW) % nH) * m + eps,
-            (p % nW) * m + nu,
+            p // (nH * nW), ci, ((p // nW) % nH) * m + eps, (p % nW) * m + nu
         ],
         name="input_tile",
         attrs={"schedule_rule": "None"},
@@ -1416,22 +1395,18 @@ def _conv2d_winograd_nchw_impl(
     data_pack = te.compute(
         (alpha, alpha, CI, P),
         lambda eps, nu, ci, p: te.sum(
-            input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu],
-            axis=[r_a, r_b],
+            input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b]
         ),
         name="data_pack",
-        attrs={
-            "schedule_rule": "conv2d_nchw_winograd_data_pack",
-        },
+        attrs={"schedule_rule": "conv2d_nchw_winograd_data_pack"},
     )
 
     # do batch gemm
     ci = te.reduce_axis((0, CI), name="ci")
     bgemm = te.compute(
         (alpha, alpha, CO, P),
         lambda eps, nu, co, p: te.sum(
-            data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co],
-            axis=[ci],
+            data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co], axis=[ci]
         ),
         name="bgemm",
         attrs=bgemm_attrs,
@@ -1443,24 +1418,16 @@ def _conv2d_winograd_nchw_impl(
     inverse = te.compute(
         (CO, P, m, m),
         lambda co, p, vh, vw: te.sum(
-            bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw],
-            axis=[r_a, r_b],
+            bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b]
         ),
         name="inverse",
-        attrs={
-            "schedule_rule": "conv2d_nchw_winograd_inverse",
-        },
+        attrs={"schedule_rule": "conv2d_nchw_winograd_inverse"},
     )
 
     # output
     output = te.compute(
         (N, CO, H, W),
-        lambda n, co, h, w: inverse[
-            co,
-            n * nH * nW + (h // m) * nW + (w // m),
-            h % m,
-            w % m,
-        ],
+        lambda n, co, h, w: inverse[co, n * nH * nW + (h // m) * nW + (w // m), h % m, w % m],
         name="conv2d_winograd",
     )
 
diff --git a/python/tvm/topi/nn/depthwise_conv2d.py b/python/tvm/topi/nn/depthwise_conv2d.py
@@ -65,27 +65,24 @@ def _get_workload(data, kernel, stride, padding, dilation, out_dtype, data_layou
     elif data_layout == "NCHWc":
         _, in_channel_chunk, height, width, in_channel_block = get_const_tuple(data.shape)
         in_channel = in_channel_chunk * in_channel_block
-        (
-            filter_channel_chunk,
-            cm_chunk,
-            kh,
-            kw,
-            cm_block,
-            filter_channel_block,
-        ) = get_const_tuple(kernel.shape)
+        (filter_channel_chunk, cm_chunk, kh, kw, cm_block, filter_channel_block) = get_const_tuple(
+            kernel.shape
+        )
         filter_channel = filter_channel_chunk * filter_channel_block
         channel_multiplier = cm_chunk * cm_block
 
-        assert (
-            in_channel_block == filter_channel_block
-        ), f"Incorrect dimensions, data has block size {in_channel_block}, but filter has block size {filter_channel_block}"
+        assert in_channel_block == filter_channel_block, (
+            f"Incorrect dimensions, data has block size {in_channel_block}, but filter has "
+            f"block size {filter_channel_block}"
+        )
 
     else:
         raise ValueError(f"Data layout {data_layout} not supported")
 
-    assert (
-        in_channel == filter_channel
-    ), f"Incorrect dimensions, data has {in_channel} channels but filter expects {filter_channel} channels"
+    assert in_channel == filter_channel, (
+        f"Incorrect dimensions, data has {in_channel} channels but filter expects "
+        f"{filter_channel} channels"
+    )
 
     out_channel = filter_channel * channel_multiplier
     dilation_h, dilation_w = (
diff --git a/python/tvm/topi/nn/utils.py b/python/tvm/topi/nn/utils.py
@@ -194,9 +194,10 @@ def get_pad_tuple_generic(padding, kernel):
         if len(padding) == len(kernel):
             pad_dimensions = [p * 2 for p in padding]
         elif len(padding) == len(kernel) * 2:
-            return [padding[i] for i in range(len(kernel))], [
-                padding[len(kernel) + i] for i in range(len(kernel))
-            ]
+            return (
+                [padding[i] for i in range(len(kernel))],
+                [padding[len(kernel) + i] for i in range(len(kernel))],
+            )
         else:
             raise ValueError("Size of padding can only be len(kernel) or len(kernel) * 2")
     elif isinstance(padding, int):
diff --git a/python/tvm/topi/testing/poolnd_python.py b/python/tvm/topi/testing/poolnd_python.py
diff --git a/python/tvm/topi/transform.py b/python/tvm/topi/transform.py
diff --git a/python/tvm/topi/x86/conv3d.py b/python/tvm/topi/x86/conv3d.py

Original file line number	Diff line number	Diff line change
`@@ -152,9 +152,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):`
`152`	`152`	`)`
`153`	`153`	`dispatch_ctx.update(target, new_workload, cfg)`
`154`	`154`	`return relay.nn.conv2d(`
`155`		`- inputs[0],`
`156`		`- relay.Constant(tvm.nd.array(reshaped_new_kernel)),`
`157`		`- **new_attrs,`
	`155`	`+ inputs[0], relay.Constant(tvm.nd.array(reshaped_new_kernel)), **new_attrs`
`158`	`156`	`)`
`159`	`157`
`160`	`158`	`# Only microTVM does layout alteration for NHWC layout with real data types`