Skip to content

Commit

Permalink
[Refactor] Remove dead code from depthwise_conv2d for Intel graphics
Browse files Browse the repository at this point in the history
After fix a66186b, I saw that it should be necessary to do the same fix
for depthwise_conv2d for intel graphics. I saw that we never used the
removed code and it is just the same code from
cuda/depthwise_conv2d.py. So we can use the cuda implementation when it
will be necessary.
  • Loading branch information
echuraev committed Jul 1, 2021
1 parent 2e47947 commit cbd8d5d
Showing 1 changed file with 0 additions and 180 deletions.
180 changes: 0 additions & 180 deletions python/tvm/topi/intel_graphics/depthwise_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,186 +136,6 @@ def _callback(op):
return s


def schedule_depthwise_conv2d_nhwc(outs):
"""Schedule for depthwise_conv2d nhwc forward.
Parameters
----------
outs: Array of Tensor
The computation graph description of depthwise_conv2d
in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for depthwise_conv2d nhwc.
"""
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = te.create_schedule([x.op for x in outs])

def _schedule(temp, Filter, DepthwiseConv2d):
s[temp].compute_inline()
FS = s.cache_read(Filter, "shared", [DepthwiseConv2d])
if DepthwiseConv2d.op in s.outputs:
Output = DepthwiseConv2d
CL = s.cache_write(DepthwiseConv2d, "local")
else:
Output = outs[0].op.output(0)
s[DepthwiseConv2d].set_scope("local")

block_x = te.thread_axis("blockIdx.x")
thread_x = te.thread_axis("threadIdx.x")

b, h, w, c = s[Output].op.axis

# num_thread here could be 728, it is larger than cuda.max_num_threads
num_thread = tvm.arith.Analyzer().simplify(temp.shape[3]).value
target = tvm.target.Target.current()
if target and (target.kind.name not in ["cuda", "nvptx"]):
num_thread = target.max_num_threads
xoc, xic = s[Output].split(c, factor=num_thread)
s[Output].reorder(xoc, b, h, w, xic)
xo, yo, _, _ = s[Output].tile(h, w, x_factor=2, y_factor=2)
fused = s[Output].fuse(yo, xo)
fused = s[Output].fuse(fused, b)
fused = s[Output].fuse(fused, xoc)

s[Output].bind(fused, block_x)
s[Output].bind(xic, thread_x)

if DepthwiseConv2d.op in s.outputs:
s[CL].compute_at(s[Output], xic)
else:
s[DepthwiseConv2d].compute_at(s[Output], xic)

_, _, ci, fi = s[FS].op.axis
s[FS].compute_at(s[Output], fused)
fused = s[FS].fuse(fi, ci)
s[FS].bind(fused, thread_x)

scheduled_ops = []

def traverse(OP):
"""Internal traverse function"""
# inline all one-to-one-mapping operators except the last stage (output)
if tag.is_broadcast(OP.tag):
if OP not in s.outputs:
s[OP].compute_inline()
for tensor in OP.input_tensors:
if tensor.op.input_tensors and tensor.op not in scheduled_ops:
traverse(tensor.op)
# schedule depthwise_conv2d
if OP.tag == "depthwise_conv2d_nhwc":
PaddedInput = OP.input_tensors[0]
Filter = OP.input_tensors[1]
if isinstance(Filter.op, tvm.te.ComputeOp) and "dilate" in Filter.op.tag:
s[Filter].compute_inline()
DepthwiseConv2d = OP.output(0)
_schedule(PaddedInput, Filter, DepthwiseConv2d)

scheduled_ops.append(OP)

traverse(outs[0].op)
return s


def schedule_depthwise_conv2d_backward_input_nhwc(outs):
"""Schedule for depthwise_conv2d nhwc backward wrt input.
Parameters
----------
outs: Array of Tensor
The computation graph description of depthwise_conv2d
backward wrt input in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for depthwise_conv2d backward
wrt input with layout nhwc.
"""
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = te.create_schedule([x.op for x in outs])

def _schedule(Padded_out_grad, In_grad):
s[Padded_out_grad].compute_inline()

block_x = te.thread_axis("blockIdx.x")
thread_x = te.thread_axis("threadIdx.x")
_, h, w, c = In_grad.op.axis

fused_hwc = s[In_grad].fuse(h, w, c)
xoc, xic = s[In_grad].split(fused_hwc, factor=128)

s[In_grad].bind(xoc, block_x)
s[In_grad].bind(xic, thread_x)

def traverse(OP):
# inline all one-to-one-mapping operators except the last stage (output)
if OP.tag == "depthwise_conv2d_backward_input_nhwc":
Padded_out_grad = OP.input_tensors[0]
Dilated_out_grad = Padded_out_grad.op.input_tensors[0]
s[Dilated_out_grad].compute_inline()
In_grad = OP.output(0)
_schedule(Padded_out_grad, In_grad)
else:
raise ValueError("Depthwise conv backward wrt input for non-NHWC is not supported.")

traverse(outs[0].op)
return s


def schedule_depthwise_conv2d_backward_weight_nhwc(outs):
"""Schedule for depthwise_conv2d nhwc backward wrt weight.
Parameters
----------
outs: Array of Tensor
The computation graph description of depthwise_conv2d
backward wrt weight in the format of an array of tensors.
Returns
-------
s: Schedule
The computation schedule for depthwise_conv2d backward
wrt weight with layout nhwc.
"""
outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
s = te.create_schedule([x.op for x in outs])

def _schedule(Weight_grad):
block_x = te.thread_axis("blockIdx.x")
thread_y = te.thread_axis("threadIdx.y")
thread_x = te.thread_axis("threadIdx.x")

db, dh, dw = Weight_grad.op.reduce_axis

fused_dbdhdw = s[Weight_grad].fuse(db, dh, dw)
_, ki = s[Weight_grad].split(fused_dbdhdw, factor=8)
BF = s.rfactor(Weight_grad, ki)

fused_fwcm = s[Weight_grad].fuse(*s[Weight_grad].op.axis)

xo, xi = s[Weight_grad].split(fused_fwcm, factor=32)

s[Weight_grad].bind(xi, thread_x)
s[Weight_grad].bind(xo, block_x)

s[Weight_grad].bind(s[Weight_grad].op.reduce_axis[0], thread_y)
s[BF].compute_at(s[Weight_grad], s[Weight_grad].op.reduce_axis[0])

def traverse(OP):
# inline all one-to-one-mapping operators except the last stage (output)
if OP.tag == "depthwise_conv2d_backward_weight_nhwc":
Padded_in = OP.input_tensors[1]
s[Padded_in].compute_inline()
Weight_grad = OP.output(0)
_schedule(Weight_grad)
else:
raise ValueError("Depthwise conv backward wrt weight for non-NHWC is not supported.")

traverse(outs[0].op)
return s


@depthwise_conv2d_infer_layout.register("intel_graphics")
Expand Down

0 comments on commit cbd8d5d

Please sign in to comment.