Skip to content

Commit

Permalink
Increase default split factor from 16 to 256 in the case of VE
Browse files Browse the repository at this point in the history
  • Loading branch information
saudet committed Jan 31, 2022
1 parent 54c0d4d commit 0694ae3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 13 deletions.
18 changes: 6 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,11 @@ which provide support for the NEC SX-Aurora TSUBASA Vector Engine (VE).
export RV_REPORT=1
export RV_FORCE_WIDTH=256
export RV_FORCE_FUNCTIONS=\
fused_nn_dense_add_compute___tvm_parallel_lambda,\
fused_nn_dense_add_1_compute___tvm_parallel_lambda,\
fused_nn_dense_add_3_compute___tvm_parallel_lambda,\
fused_nn_dense_add_4_compute___tvm_parallel_lambda,\
fused_take_transpose_contrib_reverse_reshape_transpose_1_compute___tvm_parallel_lambda,\
fused_contrib_reverse_reshape_transpose_reshape_1_compute___tvm_parallel_lambda,\
fused_reshape_sequence_mask_1_compute___tvm_parallel_lambda,\
fused_reshape_7_compute___tvm_parallel_lambda,\
fused_reshape_6_compute___tvm_parallel_lambda,\
fused_contrib_reverse_reshape_7_compute___tvm_parallel_lambda
export RV_FORCE_LOOPS=for_body,for_body2,for_body2.us.us,for_body5,for_body5.us.us
fused_reshape_add_cast_expand_dims_broadcast_to_reshape_2_compute___tvm_parallel_lambda,\
fused_take_transpose_contrib_reverse_reshape_transpose_2_compute___tvm_parallel_lambda,\
fused_contrib_reverse_reshape_transpose_reshape_2_compute___tvm_parallel_lambda,\
fused_subtract_add_sqrt_divide_multiply_add_2_compute___tvm_parallel_lambda,
export RV_FORCE_LOOPS=for_body,for_body2,for_body5,for_body5.1,for_body5.2,for_body5.us.us,for_body5.us.us.1,for_body5.us.us.2
make
lib/cpp_deploy_normal #
lib/cpp_deploy_pack # small functions just to test
Expand All @@ -88,7 +82,7 @@ which provide support for the NEC SX-Aurora TSUBASA Vector Engine (VE).
```

* Darknet models like YOLOv2, YOLOv3, etc
```
```bash
export TVM_HOME=$(pwd)/tvm/
export PYTHONPATH=$TVM_HOME/python:${PYTHONPATH}
cd tvm/tutorials/frontend
Expand Down
6 changes: 5 additions & 1 deletion python/tvm/topi/x86/injective.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
# pylint: disable=invalid-name
"""x86 declaration and schedules."""
import tvm
from tvm import te
from tvm.tir import IntImm
from ..utils import is_empty_shape
Expand Down Expand Up @@ -45,10 +46,13 @@ def schedule_injective_from_existing(sch, out):
elif len(sch[out].op.axis) >= 1:
sch[out].parallel(sch[out].op.axis[0])

target = tvm.target.Target.current(allow_none=False)
factor = 256 if "ve-linux" in target.attrs.get("mtriple", "") else 16

# Vectorize the inner most for loop. Tiling first to get a const extent
if len(sch[out].op.axis) >= 1:
l = sch[out].op.axis[-1]
lo, li = sch[out].split(l, factor=16)
lo, li = sch[out].split(l, factor=factor)
sch[out].vectorize(li)

# for 1D loop, the above split will break the parallel axis
Expand Down

0 comments on commit 0694ae3

Please sign in to comment.