Skip to content

Commit

Permalink
Format updated files
Browse files Browse the repository at this point in the history
  • Loading branch information
Krzysztof Parzyszek committed May 11, 2023
1 parent 9472d8b commit b44ebdc
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 168 deletions.
13 changes: 2 additions & 11 deletions python/tvm/topi/arm_cpu/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@
schedule_conv2d_spatial_pack_nchw,
schedule_conv2d_spatial_pack_nhwc,
)
from .mprofile.dsp.conv2d import (
conv2d_nhwc_dsp_compute,
conv2d_nhwc_dsp_schedule,
)
from .mprofile.dsp.conv2d import conv2d_nhwc_dsp_compute, conv2d_nhwc_dsp_schedule


@autotvm.register_topi_compute("conv2d_nchw_spatial_pack.arm_cpu")
Expand Down Expand Up @@ -267,13 +264,7 @@ def _schedule_winograd(cfg, s, output, last):
if isinstance(U.op, tvm.te.ComputeOp):
kernel, G = U.op.input_tensors
s[G].compute_inline()
(
eps,
nu,
k,
c,
kk,
) = s[U].op.axis
(eps, nu, k, c, kk) = s[U].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel transformation will be pre-computed during compilation, so we skip
# this part to make tuning records correct
Expand Down
4 changes: 1 addition & 3 deletions python/tvm/topi/arm_cpu/conv2d_alter_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
)
dispatch_ctx.update(target, new_workload, cfg)
return relay.nn.conv2d(
inputs[0],
relay.Constant(tvm.nd.array(reshaped_new_kernel)),
**new_attrs,
inputs[0], relay.Constant(tvm.nd.array(reshaped_new_kernel)), **new_attrs
)

# Only microTVM does layout alteration for NHWC layout with real data types
Expand Down
16 changes: 2 additions & 14 deletions python/tvm/topi/cuda/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,7 @@


def _matmul_cublas_common(
cfg,
tensor_a,
tensor_b,
bias=None,
out_dtype=None,
transpose_a=False,
transpose_b=False,
cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
):
assert len(tensor_a.shape) == 2 and len(tensor_b.shape) == 2, "only support 2-dim matmul"
if bias is not None:
Expand All @@ -58,13 +52,7 @@ def _matmul_cublas_common(

@autotvm.register_topi_compute("matmul_cublas.cuda")
def matmul_cublas(
cfg,
tensor_a,
tensor_b,
bias=None,
out_dtype=None,
transpose_a=False,
transpose_b=False,
cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
):
"""Matmul operator on CUDA with CUBLAS"""
return _matmul_cublas_common(cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b)
Expand Down
7 changes: 4 additions & 3 deletions python/tvm/topi/cuda/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,10 @@ def gen_ir(data, w_data, w_indices, w_indptr, out):
bs_m = bs_n
mb = m // bs_m
mi = warp_size
assert (
mb >= mi
), f"Number of block rows in dense matrix must be larger than warp size: {warp_size} vs {mb}."
assert mb >= mi, (
f"Number of block rows in dense matrix must be larger than warp size: "
f"{warp_size} vs {mb}."
)
mo = ceil_div(mb, mi)
ni = 1 # TODO(tkonolige): how do I compute the number of warps per block?
no = ceil_div(nb, ni)
Expand Down
16 changes: 12 additions & 4 deletions python/tvm/topi/generic/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements):
dilated_kernel_w = (wkl.kernel_w - 1) * wkl.dilation_w + 1
out_width = (wkl.width + pl + pr - dilated_kernel_w) // WSTR + 1

assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
assert (
wkl.out_filter % int32_lanes == 0
), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
assert (
wkl.in_filter % num_int8_elements == 0
), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"

oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements
ic_bn = 1
Expand Down Expand Up @@ -87,8 +91,12 @@ def fallback_schedule_cpu_1x1_int8(cfg, wkl, int32_lanes, num_int8_elements):
out_height = (wkl.height + pt + pb - wkl.kernel_h) // HSTR + 1
out_width = (wkl.width + pl + pr - wkl.kernel_w) // WSTR + 1

assert wkl.out_filter % int32_lanes == 0, f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
assert wkl.in_filter % num_int8_elements == 0, f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"
assert (
wkl.out_filter % int32_lanes == 0
), f"wkl.out_filter={wkl.out_filter}, int32_lanes={int32_lanes}"
assert (
wkl.in_filter % num_int8_elements == 0
), f"wkl.in_filter={wkl.in_filter}, num_int8_elements={num_int8_elements}"

oc_bn = int32_lanes if int32_lanes >= num_int8_elements else num_int8_elements
ic_bn = 1
Expand Down
45 changes: 4 additions & 41 deletions python/tvm/topi/image/resize.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,14 +314,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):
if boxes is not None:
# TODO(mbrookhart): Find an example of this
raise NotImplementedError("resize1d with image boxes not yet implemented")
in_x = get_inx(
x,
image_width,
target_width,
coordinate_transformation_mode,
roi[0],
roi[1],
)
in_x = get_inx(x, image_width, target_width, coordinate_transformation_mode, roi[0], roi[1])

if method == "nearest_neighbor":
if rounding_method == "":
Expand All @@ -332,35 +325,15 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):

closest_x_index = get_closest_index(in_x, rounding_method, boxes)

value = get_1d_pixel(
data,
layout,
image_width,
box_idx,
c,
closest_x_index,
cc,
inum,
ic,
)
value = get_1d_pixel(data, layout, image_width, box_idx, c, closest_x_index, cc, inum, ic)
elif method == "linear":
x_int = te.floor(in_x).astype("int32")

x_lerp = in_x - x_int

p = [0 for i in range(2)]
for i in range(2):
p[i] = get_1d_pixel(
data,
layout,
image_width,
box_idx,
c,
x_int + i,
cc,
inum,
ic,
)
p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, x_int + i, cc, inum, ic)

value = _lerp(*p, x_lerp)

Expand All @@ -371,17 +344,7 @@ def _cast_output(value, data_dtype="float32", out_dtype=None):
# Get the surrounding values
p = [0 for i in range(4)]
for i in range(4):
p[i] = get_1d_pixel(
data,
layout,
image_width,
box_idx,
c,
xint + i - 1,
cc,
inum,
ic,
)
p[i] = get_1d_pixel(data, layout, image_width, box_idx, c, xint + i - 1, cc, inum, ic)

wx = _cubic_spline_weights(xfract, alpha)
if exclude_outside:
Expand Down
63 changes: 15 additions & 48 deletions python/tvm/topi/nn/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,8 +1224,7 @@ def _conv2d_winograd_nhwc_impl(
kernel_pack = te.compute(
(alpha, alpha, CO, CI),
lambda eps, nu, co, ci: te.sum(
weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw],
axis=[r_kh, r_kw],
weight[r_kh, r_kw, ci, co] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw]
),
name="kernel_pack",
)
Expand All @@ -1243,10 +1242,7 @@ def _conv2d_winograd_nhwc_impl(
input_tile = te.compute(
(alpha, alpha, P, CI),
lambda eps, nu, p, ci: data_pad[
p // (nH * nW),
((p // nW) % nH) * m + eps,
(p % nW) * m + nu,
ci,
p // (nH * nW), ((p // nW) % nH) * m + eps, (p % nW) * m + nu, ci
],
name="input_tile",
attrs={"schedule_rule": "None"},
Expand All @@ -1258,8 +1254,7 @@ def _conv2d_winograd_nhwc_impl(
data_pack = te.compute(
(alpha, alpha, P, CI),
lambda eps, nu, p, ci: te.sum(
input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu],
axis=[r_a, r_b],
input_tile[r_a, r_b, p, ci] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b]
),
name="data_pack",
attrs={
Expand All @@ -1273,8 +1268,7 @@ def _conv2d_winograd_nhwc_impl(
bgemm = te.compute(
(alpha, alpha, P, CO),
lambda eps, nu, p, co: te.sum(
data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci],
axis=[ci],
data_pack[eps, nu, p, ci] * kernel_pack[eps, nu, co, ci], axis=[ci]
),
name="bgemm",
attrs=bgemm_attrs,
Expand All @@ -1290,8 +1284,7 @@ def _conv2d_winograd_nhwc_impl(
inverse = te.compute(
(m, m, P, CO),
lambda vh, vw, p, co: te.sum(
bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw],
axis=[r_a, r_b],
bgemm[r_a, r_b, p, co] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b]
),
name="inverse",
attrs={
Expand All @@ -1303,12 +1296,7 @@ def _conv2d_winograd_nhwc_impl(
# output
output = te.compute(
(N, H, W, CO),
lambda n, h, w, co: inverse[
h % m,
w % m,
n * nH * nW + (h // m) * nW + (w // m),
co,
],
lambda n, h, w, co: inverse[h % m, w % m, n * nH * nW + (h // m) * nW + (w // m), co],
name="conv2d_winograd",
)

Expand Down Expand Up @@ -1358,12 +1346,7 @@ def _conv2d_winograd_nchw_impl(
assert HSTR == 1 and WSTR == 1 and KH == 3 and KW == 3

pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW))
data_pad = pad(
data,
(0, 0, pt, pl),
(0, 0, pb, pr),
name="data_pad",
)
data_pad = pad(data, (0, 0, pt, pl), (0, 0, pb, pr), name="data_pad")

r = KW
m = tile_size
Expand All @@ -1382,8 +1365,7 @@ def _conv2d_winograd_nchw_impl(
kernel_pack = te.compute(
(alpha, alpha, CI, CO),
lambda eps, nu, ci, co: te.sum(
weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw],
axis=[r_kh, r_kw],
weight[co, ci, r_kh, r_kw] * G[eps, r_kh] * G[nu, r_kw], axis=[r_kh, r_kw]
),
name="kernel_pack",
)
Expand All @@ -1401,10 +1383,7 @@ def _conv2d_winograd_nchw_impl(
input_tile = te.compute(
(CI, P, alpha, alpha),
lambda ci, p, eps, nu: data_pad[
p // (nH * nW),
ci,
((p // nW) % nH) * m + eps,
(p % nW) * m + nu,
p // (nH * nW), ci, ((p // nW) % nH) * m + eps, (p % nW) * m + nu
],
name="input_tile",
attrs={"schedule_rule": "None"},
Expand All @@ -1416,22 +1395,18 @@ def _conv2d_winograd_nchw_impl(
data_pack = te.compute(
(alpha, alpha, CI, P),
lambda eps, nu, ci, p: te.sum(
input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu],
axis=[r_a, r_b],
input_tile[ci, p, r_a, r_b] * B[r_a, eps] * B[r_b, nu], axis=[r_a, r_b]
),
name="data_pack",
attrs={
"schedule_rule": "conv2d_nchw_winograd_data_pack",
},
attrs={"schedule_rule": "conv2d_nchw_winograd_data_pack"},
)

# do batch gemm
ci = te.reduce_axis((0, CI), name="ci")
bgemm = te.compute(
(alpha, alpha, CO, P),
lambda eps, nu, co, p: te.sum(
data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co],
axis=[ci],
data_pack[eps, nu, ci, p] * kernel_pack[eps, nu, ci, co], axis=[ci]
),
name="bgemm",
attrs=bgemm_attrs,
Expand All @@ -1443,24 +1418,16 @@ def _conv2d_winograd_nchw_impl(
inverse = te.compute(
(CO, P, m, m),
lambda co, p, vh, vw: te.sum(
bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw],
axis=[r_a, r_b],
bgemm[r_a, r_b, co, p] * A[r_a, vh] * A[r_b, vw], axis=[r_a, r_b]
),
name="inverse",
attrs={
"schedule_rule": "conv2d_nchw_winograd_inverse",
},
attrs={"schedule_rule": "conv2d_nchw_winograd_inverse"},
)

# output
output = te.compute(
(N, CO, H, W),
lambda n, co, h, w: inverse[
co,
n * nH * nW + (h // m) * nW + (w // m),
h % m,
w % m,
],
lambda n, co, h, w: inverse[co, n * nH * nW + (h // m) * nW + (w // m), h % m, w % m],
name="conv2d_winograd",
)

Expand Down
25 changes: 11 additions & 14 deletions python/tvm/topi/nn/depthwise_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,27 +65,24 @@ def _get_workload(data, kernel, stride, padding, dilation, out_dtype, data_layou
elif data_layout == "NCHWc":
_, in_channel_chunk, height, width, in_channel_block = get_const_tuple(data.shape)
in_channel = in_channel_chunk * in_channel_block
(
filter_channel_chunk,
cm_chunk,
kh,
kw,
cm_block,
filter_channel_block,
) = get_const_tuple(kernel.shape)
(filter_channel_chunk, cm_chunk, kh, kw, cm_block, filter_channel_block) = get_const_tuple(
kernel.shape
)
filter_channel = filter_channel_chunk * filter_channel_block
channel_multiplier = cm_chunk * cm_block

assert (
in_channel_block == filter_channel_block
), f"Incorrect dimensions, data has block size {in_channel_block}, but filter has block size {filter_channel_block}"
assert in_channel_block == filter_channel_block, (
f"Incorrect dimensions, data has block size {in_channel_block}, but filter has "
f"block size {filter_channel_block}"
)

else:
raise ValueError(f"Data layout {data_layout} not supported")

assert (
in_channel == filter_channel
), f"Incorrect dimensions, data has {in_channel} channels but filter expects {filter_channel} channels"
assert in_channel == filter_channel, (
f"Incorrect dimensions, data has {in_channel} channels but filter expects "
f"{filter_channel} channels"
)

out_channel = filter_channel * channel_multiplier
dilation_h, dilation_w = (
Expand Down
7 changes: 4 additions & 3 deletions python/tvm/topi/nn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,10 @@ def get_pad_tuple_generic(padding, kernel):
if len(padding) == len(kernel):
pad_dimensions = [p * 2 for p in padding]
elif len(padding) == len(kernel) * 2:
return [padding[i] for i in range(len(kernel))], [
padding[len(kernel) + i] for i in range(len(kernel))
]
return (
[padding[i] for i in range(len(kernel))],
[padding[len(kernel) + i] for i in range(len(kernel))],
)
else:
raise ValueError("Size of padding can only be len(kernel) or len(kernel) * 2")
elif isinstance(padding, int):
Expand Down
Loading

0 comments on commit b44ebdc

Please sign in to comment.