diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 25367bb7b04c4..3240ebcd515c8 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -104,7 +104,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, i # The following algorithm performs parallel exclusive scan # Up Sweep of exclusive scan lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(scan_axis_size, "float64"))), "int64" ) with ib.for_range(0, lim, dtype="int64") as l2_width: width = 2 << l2_width diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 5e6108737cd66..5ebd3060a6bbf 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -239,7 +239,7 @@ def compare(a, b): # Sort the lower levels of the merge using odd-even sort, it's fast for small inputs lower_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(block_size, "float64"))), "int64" ) _odd_even_sort( @@ -255,7 +255,7 @@ def compare(a, b): ) upper_lim = tvm.tir.generic.cast( - tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float32"))), "int64" + tvm.tir.ceil(tvm.tir.log2(tvm.tir.generic.cast(size, "float64"))), "int64" ) def get_merge_begin(source, base_idx, aCount, bCount, aStart, bStart, diag, step_count): diff --git a/src/target/spirv/ir_builder.h b/src/target/spirv/ir_builder.h index 16bff657bfe73..c06ec0acf341d 100644 --- a/src/target/spirv/ir_builder.h +++ b/src/target/spirv/ir_builder.h @@ -491,7 +491,7 @@ class IRBuilder { */ Value GetPushConstant(Value ptr_push_const, const SType& v_type, uint32_t index); - // TODO doc + // TODO(masahi): doc Value DeclareUniformBuffer(const std::vector& value_types, uint32_t binding); Value GetUniform(Value ptr_ubo, const SType& v_type, uint32_t index); /*!