apache · yzhliu · May 5, 2019 · May 2, 2019 · May 4, 2019 · May 4, 2019
diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py
@@ -42,7 +42,7 @@
     'cuda':    "v0.04",
     'rocm':    "v0.02",
     'opencl':  "v0.02",
-    'mali':    "v0.04",
+    'mali':    "v0.05",
 
     'vta':     "v0.04",
 }

diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py
@@ -352,9 +352,10 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, layout, out_dt
     # unpack output
     output = tvm.compute((N, CO, H, W), lambda n, co, h, w:
                          Y[co][n * nH * nW + (h//m) * nW + w//m][h % m][w % m]
-                         # thw following term is used to make the padding effective,
-                         # otherwise the padding will be eliminated by bound inference
-                         + tvm.const(0, out_dtype) * M[alpha-1][alpha-1][CO-1][P_round-1],
+                         # thw following hack term is used to make the padding in batch gemm ("M")
+                         # effective, otherwise the padding will be eliminated by bound inference
+                         + tvm.expr.Mul(tvm.const(0, out_dtype),
+                                        M[alpha-1][alpha-1][CO-1][P_round-1]),
                          name='output', tag='winograd_conv2d_output')
 
     # we have to manually assign effective GFLOP for winograd