From 5f9f82dd74e51f316a9aeac5d455d1aec4981bab Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 21 Mar 2021 07:55:30 +0900 Subject: [PATCH] do not use float64 --- python/tvm/topi/cuda/scan.py | 2 +- python/tvm/topi/cuda/sort.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 3240ebcd515c..25367bb7b04c 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -104,7 +104,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, i # The following algorithm performs parallel exclusive scan # Up Sweep of exclusive scan lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float32"))), "int64" ) with ib.for_range(0, lim, dtype="int64") as l2_width: width = 2 << l2_width diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 5ebd3060a6bb..5e6108737cd6 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -239,7 +239,7 @@ def compare(a, b): # Sort the lower levels of the merge using odd-even sort, it's fast for small inputs lower_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float32"))), "int64" ) _odd_even_sort( @@ -255,7 +255,7 @@ def compare(a, b): ) upper_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float64"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float32"))), "int64" ) def get_merge_begin(source, base_idx, aCount, bCount, aStart, bStart, diag, step_count):