Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Feb 22, 2021
1 parent 533026b commit c1629b3
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion python/tvm/relay/op/strategy/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ def judge_winograd(
OH = (H + pt + pb - KH) // stride_h + 1
OW = (W + pl + pr - KW) // stride_w + 1
nH, nW = (OH + tile_size - 1) // tile_size, (OW + tile_size - 1) // tile_size

if not isinstance(N, int):
return False, False, False
P = N * nH * nW

judge_winograd_tensorcore = (
Expand Down Expand Up @@ -705,7 +708,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
name="dense_tensorcore.cuda",
plevel=20,
)
if target.kind.name == "cuda" and "cublas" in target.libs:
if target.kind.name in ["cuda", "nvptx"] and "cublas" in target.libs:
strategy.add_implementation(
wrap_compute_dense(topi.cuda.dense_cublas),
wrap_topi_schedule(topi.cuda.schedule_dense_cublas),
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/topi/cuda/conv2d_nhwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,4 @@ def schedule_conv2d_nhwc_direct(cfg, s, Conv):

N, OH, OW, CO = get_const_tuple(output.shape)
KH, KW, CI, _ = get_const_tuple(kernel.shape)
cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)
# cfg.add_flop(2 * N * OH * OW * CO * CI * KH * KW)

0 comments on commit c1629b3

Please sign in to comment.